Python FastVector.export Examples

Programming Language: Python

Namespace/Package Name: fasttext

Class/Type: FastVector

Method/Function: export

Examples at hotexamples.com: 6

Python FastVector.export - 6 examples found. These are the top rated real world Python examples of fasttext.FastVector.export extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

FastVector(24)

apply_transform(10)

cosine_similarity(6)

export(6)

translate_k_nearest_neighbour(2)

Example #1

Show file

File: zprep_ud23.py Project: zzsfornlp/zmsp

def main2():
    for zzz in LANGUAGE_LIST:
        lang = zzz[0]
        # get original embed
        system(
            "wget -nc -O %s/wiki.%s.vec https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.%s.vec"
            % (OUT_DIR, lang, lang),
            pp=True)
        # project with LIB-matrix
        lang_dict = FastVector(vector_file='%s/wiki.%s.vec' % (OUT_DIR, lang))
        lang_dict.apply_transform("%s/alignment_matrices/%s.txt" %
                                  (LIB_DIR, lang))
        lang_dict.export("%s/wiki.multi.%s.vec" % (OUT_DIR, lang))

Example #2

Show file

def main():
    # first get the English one
    lang = "en"
    system(
        "wget -nc -O %s/wiki.%s.vec https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.%s.vec"
        % (OUT_DIR, lang, lang),
        pp=True)
    # en_dict = FastVector(vector_file='%s/wiki.en.vec' % OUT_DIR)
    for zzz in LANGUAGE_LIST:
        lang, fnames = zzz[0], zzz[1]
        printing("Dealing with lang %s." % lang)
        for curf in ["train", "dev", "test"]:
            out_fname = "%s/%s_%s.conllu" % (OUT_DIR, lang, curf)
            fout = zopen(out_fname, "w")
            for fname in fnames:
                last_name = fname.split("-")[-1].lower()
                path_name = "%s/%s/%s_%s-ud-%s.conllu" % (UD2_DIR, fname, lang,
                                                          last_name, curf)
                if os.path.exists(path_name):
                    with zopen(path_name) as fin:
                        deal_conll_file(fin, fout)
            fout.close()
            # stat
            system('cat %s | grep -E "^$" | wc' % out_fname, pp=True)
            system('cat %s | grep -Ev "^$" | wc' % out_fname, pp=True)
            system(
                "cat %s | grep -Ev '^$' | cut -f 5 -d $'\t'| grep -Ev 'PUNCT|SYM' | wc"
                % out_fname,
                pp=True)
        # get original embed
        system(
            "wget -nc -O %s/wiki.%s.vec https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.%s.vec"
            % (OUT_DIR, lang, lang),
            pp=True)
        # project with LIB-matrix
        lang_dict = FastVector(vector_file='%s/wiki.%s.vec' % (OUT_DIR, lang))
        lang_dict.apply_transform("%s/alignment_matrices/%s.txt" %
                                  (LIB_DIR, lang))
        lang_dict.export("%s/wiki.multi.%s.vec" % (OUT_DIR, lang))

Example #3

Show file

File: align_new_languages.py Project: dhfbk/falsefriends

    # return orthogonal transformation which aligns source language to the target
    return np.matmul(U, V)


lang1_dictionary = FastVector(vector_file=args.lang1)
lang2_dictionary = FastVector(vector_file=args.lang2)

bilingual_dictionary = []
file_object = open(args.dict, "r")
lines = file_object.readlines()
for line in lines:
    line = re.sub(r'\n', '', line)
    w_lang2, w_lang1 = line.split('\t')
    if w_lang1 in lang1_dictionary.word2id.keys(
    ) and w_lang2 in lang2_dictionary.word2id.keys():
        bilingual_dictionary.append(tuple((w_lang2, w_lang1)))

print("Dic Size: " + str(len(bilingual_dictionary)))

# form the training matrices# form
source_matrix, target_matrix = make_training_matrices(lang1_dictionary,
                                                      lang2_dictionary,
                                                      bilingual_dictionary)
# learn and apply the transformation
transform = learn_transformation(source_matrix, target_matrix)
lang1_dictionary.apply_transform(transform)

lang1_dictionary.export(args.out1)
lang2_dictionary.export(args.out2)

Example #4

Show file

en_vector = en_dictionary["love"]
zh_vector = zh_dictionary["爱"]

# going to print 0.0004326613965749648
print(FastVector.cosine_similarity(en_vector, zh_vector))

zh_words = set(zh_dictionary.word2id.keys())
en_words = set(en_dictionary.word2id.keys())
overlap = list(zh_words & en_words)
bilingual_dictionary = [(entry, entry) for entry in overlap]

# form the training matrices
source_matrix, target_matrix = make_training_matrices(en_dictionary,
                                                      zh_dictionary,
                                                      bilingual_dictionary)

# learn and apply the transformation
transform = learn_transformation(source_matrix, target_matrix)
en_dictionary.apply_transform(transform)

en_vector = en_dictionary["love"]
zh_vector = zh_dictionary["爱"]

# going to print 0.18727020978991674
print(FastVector.cosine_similarity(en_vector, zh_vector))

en_dictionary.export("cc.en.aligned.to.zh.vec")

embedding = gluonnlp.embedding.FastText.from_file('cc.en.aligned.to.zh.vec')
embedding.serialize('cc.en.300.aligned.to.zh.vec.npz')

Example #5

Show file

print "Readling Dictionary"
BI_DICT = codecs.open("o.s2t_f", "r").readlines()
BI_DICT = parse_BI(BI_DICT)
print "Readling Dictionary (END)"

# SRC_WORD = "昨天"
# TGT_WORD = "yesterday"
SRC_WORD = "钥匙"
TGT_WORD = "keys"
en_dictionary = FastVector(vector_file='en.emb.orig.vec')
other_dictionary = FastVector(vector_file='tizh.emb.orig.vec')

test_word(other_dictionary, en_dictionary, SRC_WORD, TGT_WORD)

# form the training matrices
print "Learning SVD"
source_matrix, target_matrix = make_training_matrices(other_dictionary,
                                                      en_dictionary, BI_DICT)

# learn and apply the transformation
transform = learn_transformation(source_matrix, target_matrix)
other_dictionary.apply_transform(transform)
# zh
test_word(other_dictionary, en_dictionary, SRC_WORD, TGT_WORD)
# ti
SRC_WORD = "กุญ"
test_word(other_dictionary, en_dictionary, SRC_WORD, TGT_WORD)

print "Writing transform Qe out"
other_dictionary.export("Qe")

Example #6

Show file

#coding=utf-8
from fasttext import FastVector
fr_dictionary = FastVector(vector_file='wiki.en.vec')
fr_dictionary.export('fr.vec.txt')