Ejemplo n.º 1
0
def test_3words():
    data = [('here', 'there'), ('now', 'then')]

    model = NeuralTranslationModel(
        load_vsm_fixture('three-words/vsm-a-window5-min0-dim5'),
        load_vsm_fixture('three-words/vsm-b-window5-min0-dim5'),
        hidden_layer_size=5, learning_rate=0.01)
    model.train(data)

    # TODO how to assert good enough performance here? Maybe train an ensemble?
    print evaluate_model(model, data)
Ejemplo n.º 2
0
def test_3words():
    data = [('here', 'there'), ('now', 'then')]

    model = LinearTranslationModel(
        load_vsm_fixture('three-words/vsm-a-window5-min0-dim5'),
        load_vsm_fixture('three-words/vsm-b-window5-min0-dim5'))
    model.train(data)

    for score in evaluate_model(model, data):
        assert_equal(1.0, score, "Should see 100% performance on training data")
Ejemplo n.º 3
0
def main(arguments):
    if arguments.vsm_binary:
        vsm_source = Word2Vec.load_word2vec_format(arguments.vsm_source,
                                                   binary=True, norm_only=True)
        vsm_target = Word2Vec.load_word2vec_format(arguments.vsm_target,
                                                   binary=True, norm_only=True)
    else:
        vsm_source = Word2Vec.load_normalized(arguments.vsm_source)
        vsm_target = Word2Vec.load_normalized(arguments.vsm_target)

        if vsm_source.syn0norm is None:
            vsm_source.init_sims()
        if vsm_target.syn0norm is None:
            vsm_target.init_sims()

    # Instantiate model
    model_class = MODEL_MAPPING[arguments.model]
    model = model_class(vsm_source, vsm_target, **arguments.model_arguments)

    # Load seed data
    data_train = (load_seed_data(arguments.data_train)
                  if arguments.data_train else None)
    data_test = (load_seed_data(arguments.data_test)
                 if arguments.data_test else None)

    # Attempt to load model from file
    if arguments.model_file is not None:
        logging.debug("Loading model from file '{}'"
                      .format(arguments.model_file))

        try:
            model.load(arguments.model_file)
        except NotImplementedError:
            logging.error("Requested model does not support loading from "
                          "saved files")
            sys.exit(1)

    # Train model
    if data_train is not None:
        model.train(data_train)

        save_arguments = vars(arguments)
        save_arguments['extra'] = {
            'training_pairs': data_train
        }

        save_model(model, save_arguments)

    # Test model
    if data_test is not None:
        scores = list(evaluate_model(model, data_test))
        print mean(scores), std(scores)