def test_3words(): data = [('here', 'there'), ('now', 'then')] model = NeuralTranslationModel( load_vsm_fixture('three-words/vsm-a-window5-min0-dim5'), load_vsm_fixture('three-words/vsm-b-window5-min0-dim5'), hidden_layer_size=5, learning_rate=0.01) model.train(data) # TODO how to assert good enough performance here? Maybe train an ensemble? print evaluate_model(model, data)
def test_3words(): data = [('here', 'there'), ('now', 'then')] model = LinearTranslationModel( load_vsm_fixture('three-words/vsm-a-window5-min0-dim5'), load_vsm_fixture('three-words/vsm-b-window5-min0-dim5')) model.train(data) for score in evaluate_model(model, data): assert_equal(1.0, score, "Should see 100% performance on training data")
def main(arguments): if arguments.vsm_binary: vsm_source = Word2Vec.load_word2vec_format(arguments.vsm_source, binary=True, norm_only=True) vsm_target = Word2Vec.load_word2vec_format(arguments.vsm_target, binary=True, norm_only=True) else: vsm_source = Word2Vec.load_normalized(arguments.vsm_source) vsm_target = Word2Vec.load_normalized(arguments.vsm_target) if vsm_source.syn0norm is None: vsm_source.init_sims() if vsm_target.syn0norm is None: vsm_target.init_sims() # Instantiate model model_class = MODEL_MAPPING[arguments.model] model = model_class(vsm_source, vsm_target, **arguments.model_arguments) # Load seed data data_train = (load_seed_data(arguments.data_train) if arguments.data_train else None) data_test = (load_seed_data(arguments.data_test) if arguments.data_test else None) # Attempt to load model from file if arguments.model_file is not None: logging.debug("Loading model from file '{}'" .format(arguments.model_file)) try: model.load(arguments.model_file) except NotImplementedError: logging.error("Requested model does not support loading from " "saved files") sys.exit(1) # Train model if data_train is not None: model.train(data_train) save_arguments = vars(arguments) save_arguments['extra'] = { 'training_pairs': data_train } save_model(model, save_arguments) # Test model if data_test is not None: scores = list(evaluate_model(model, data_test)) print mean(scores), std(scores)