def command_linear_trans(args_):
        model_es = word_vectors.load_model(args_.model_es_file_name)
        model_pt = word_vectors.load_model(args_.model_pt_file_name)

        if args_.random_pair_per_synset:
            lexicon = bilingual_lexicon.random_pair_per_synset_bilingual_lexicon(
            )
        elif args_.most_frequent:
            lexicon = bilingual_lexicon.most_frequent_bilingual_lexicon_based_on_external_count(
                model_es.vocab, model_pt.vocab)
        else:
            lexicon = bilingual_lexicon.bilingual_lexicon()

        X, Y = zip(*word_vectors.bilingual_lexicon_vectors(
            model_es, model_pt, bilingual_lexicon=lexicon))
        T = linear_trans.linear_transformation(X, Y, args_.backwards)
        linear_trans.save_linear_transformation(
            args_.translation_matrix_file_name, T)
Example #2
0
# -*- coding: utf-8 -*-
import logging
import random

from sklearn import svm

from falsefriends import classifier
from falsefriends import linear_trans
from falsefriends import util, word_vectors

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)

training_friend_pairs = util.read_words('resources/sepulveda2011_training.txt')

model_es = word_vectors.load_model('resources/big/vectors_es_100.bin')
model_pt = word_vectors.load_model('resources/big/vectors_pt_100.bin')

logging.info("computing equal words...")
equal_words = model_es.vocab.keys() & model_pt.vocab.keys()

print("Equal words number in the Wikipedia's:", len(equal_words))

SAMPLE_SIZE = 20
print("Sample", SAMPLE_SIZE, "equal words found:",
      random.sample(equal_words, SAMPLE_SIZE))

T = linear_trans.load_linear_transformation(
    'resources/big/trans_es_100_pt_100.npz')

clf = classifier.build_classifier()
# -*- coding: utf-8 -*-
import logging
import os
import sys

import numpy as np

PARENT_DIR = os.path.abspath(
    os.path.dirname(os.path.realpath(__file__)) + '/..')
sys.path.insert(0, PARENT_DIR)

from falsefriends import bilingual_lexicon, classifier, linear_trans, util, word_vectors

# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

model_es = word_vectors.load_model(PARENT_DIR +
                                   '/resources/big/vectors_es_100.bin')
model_pt = word_vectors.load_model(PARENT_DIR +
                                   '/resources/big/vectors_pt_100.bin')

lexicon = bilingual_lexicon.most_frequent_bilingual_lexicon_based_on_external_count(
    model_es.vocab, model_pt.vocab)

logging.info("getting vector pairs")
X, Y = zip(*word_vectors.bilingual_lexicon_vectors(
    model_es, model_pt, bilingual_lexicon=lexicon))

X_array = np.vstack(X)
Y_array = np.vstack(Y)

logging.info(
    "Computing linear transformations and classifying with cross-validation..."
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import random

from sklearn import svm

from falsefriends import classifier
from falsefriends import linear_trans
from falsefriends import util, word_vectors

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

training_friend_pairs = util.read_words('resources/sepulveda2011_training.txt')

model_es = word_vectors.load_model('resources/big/jairo/vectors_es.bin')
model_pt = word_vectors.load_model('resources/big/jairo/vectors_pt.bin')

logging.info("computing equal words...")
equal_words = model_es.vocab.keys() & model_pt.vocab.keys()

print("Equal words number in the Wikipedia's:", len(equal_words))

SAMPLE_SIZE = 20
print("Sample", SAMPLE_SIZE, "equal words found:", random.sample(equal_words, SAMPLE_SIZE))

T = linear_trans.load_linear_transformation('resources/big/jairo/linear_trans.npz')

clf = svm.SVC()

X_train, y_train, scaler = classifier.features_labels_and_scaler(training_friend_pairs, model_es, model_pt, T)
Example #5
0
def read_models(_args):
    model_es = word_vectors.load_model(_args.model_es_file_name)
    model_pt = word_vectors.load_model(_args.model_pt_file_name)
    return model_es, model_pt
 def command_linear_trans(_args):
     model_es = word_vectors.load_model(_args.model_es_file_name)
     model_pt = word_vectors.load_model(_args.model_pt_file_name)
     X, Y = zip(*word_vectors.bilingual_lexicon_vectors(model_es, model_pt))
     T = linear_trans.linear_transformation(X, Y, _args.backwards)
     linear_trans.save_linear_transformation(_args.translation_matrix_file_name, T)
Example #7
0
    os.path.dirname(os.path.realpath(__file__)) + '/..')
sys.path.insert(0, PARENT_DIR)

from falsefriends import bilingual_lexicon, classifier, linear_trans, util, word_vectors

# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

print("Method\t\t Acc")

friend_pairs = util.read_words(PARENT_DIR +
                               '/resources/sepulveda2011_original.txt')

VECTOR_SIZES = [100, 200, 400, 800]

for size_es in VECTOR_SIZES:
    model_es = word_vectors.load_model(
        PARENT_DIR + '/resources/big/vectors_es_{}.bin'.format(size_es))
    for size_pt in VECTOR_SIZES:
        model_pt = word_vectors.load_model(
            PARENT_DIR + '/resources/big/vectors_pt_{}.bin'.format(size_pt))

        clf = classifier.build_classifier()

        T_path = PARENT_DIR + '/resources/big/trans_es_{}_pt_{}.npz'.format(
            size_es, size_pt)
        if os.path.exists(T_path):
            T = linear_trans.load_linear_transformation(T_path)
        else:
            lexicon = bilingual_lexicon.most_frequent_bilingual_lexicon_based_on_external_count(
                model_es.vocab, model_pt.vocab)
            X, Y = zip(*word_vectors.bilingual_lexicon_vectors(
                model_es, model_pt, bilingual_lexicon=lexicon))
Example #8
0
def read_models(args_):
    model_es = word_vectors.load_model(args_.model_es_file_name)
    model_pt = word_vectors.load_model(args_.model_pt_file_name)
    return model_es, model_pt