def apply_model(in_file, out_dir, model, trained_model, arg_space_files, alpha, beta, lambda_, out_format): print "Reading in data..." in_descr = in_file.split("/")[-1] if not model is None: model_obj = create_model(model, alpha, beta, lambda_) else: model_obj = io_utils.load(trained_model, CompositionModel) model_descr = type(model_obj).__name__ arg_space = io_utils.load(arg_space_files[0], Space) arg_space2 = None if len(arg_space_files) == 2: arg_space2 = io_utils.load(arg_space_files[1], Space) data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2]) print "Applying composition model:%s" % model_descr if arg_space2 is None or type(model_obj) is LexicalFunction: composed_space = model_obj.compose(data, arg_space) else: composed_space = model_obj.compose(data, (arg_space, arg_space2)) print "Printing..." out_file = ".".join([out_dir + "/COMPOSED_SS", model_descr, in_descr]) io_utils.save(composed_space, "%s.pkl" % out_file) if not out_format is None: composed_space.export(out_file, format=out_format)
def train_model(in_file, out_dir, model, arg_space_files, phrase_space_file, regression, crossvalid, intercept, param, param_range, export_params): print "Reading in data..." in_descr = in_file.split("/")[-1] model_dict = { "weighted_add": WeightedAdditive, "full_add": FullAdditive, "lexical_func": LexicalFunction, "dilation": Dilation } learner_dict = { "ridge": RidgeRegressionLearner, "lstsq": LstsqRegressionLearner } arg_space = io_utils.load(arg_space_files[0], Space) arg_space2 = None if len(arg_space_files) == 2: arg_space2 = io_utils.load(arg_space_files[1], Space) phrase_space = io_utils.load(phrase_space_file, Space) if not model in model_dict: raise ValueError("Invalid model:%s for training" % model) model_cls = model_dict[model] if model_cls in (WeightedAdditive, Dilation): model_obj = model_cls() else: if regression == "ridge": regression_obj = learner_dict[regression]( crossvalidation=crossvalid, intercept=intercept, param=param, param_range=param_range) model_obj = model_cls(learner=regression_obj) elif regression == "lstsq": regression_obj = learner_dict[regression](intercept=intercept) model_obj = model_cls(learner=regression_obj) else: model_obj = model_cls() train_data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2]) print "Training %s model" % model if arg_space2 is None or model == "lexical_func": model_obj.train(train_data, arg_space, phrase_space) else: model_obj.train(train_data, (arg_space, arg_space2), phrase_space) print "Printing..." out_file = ".".join([out_dir + "/TRAINED_COMP_MODEL", model, in_descr]) io_utils.save(model_obj, "%s.pkl" % out_file) if export_params: model_obj.export("%s.params" % out_file)
def apply_model(in_file, out_dir, model, trained_model, arg_space_files, alpha, beta, lambda_, out_format): print("Reading in data...") in_descr = in_file.split("/")[-1] if not model is None: model_obj = create_model(model, alpha, beta, lambda_) else: model_obj = io_utils.load(trained_model, CompositionModel) model_descr = type(model_obj).__name__ arg_space = io_utils.load(arg_space_files[0], Space) arg_space2 = None if len(arg_space_files) == 2: arg_space2 = io_utils.load(arg_space_files[1], Space) data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2]) print("Applying composition model:%s" % model_descr) if arg_space2 is None or type(model_obj) is LexicalFunction: composed_space = model_obj.compose(data, arg_space) else: composed_space = model_obj.compose(data, (arg_space, arg_space2)) print("Printing...") out_file = ".".join([out_dir + "/COMPOSED_SS", model_descr, in_descr]) io_utils.save(composed_space, "%s.pkl" % out_file) if not out_format is None: composed_space.export(out_file, format=out_format)
def train_model(in_file, out_dir, model, arg_space_files, phrase_space_file, regression, crossvalid, intercept, param, param_range, export_params): print "Reading in data..." in_descr = in_file.split("/")[-1] model_dict = {"weighted_add": WeightedAdditive, "full_add": FullAdditive, "lexical_func": LexicalFunction, "dilation": Dilation } learner_dict = {"ridge": RidgeRegressionLearner, "lstsq": LstsqRegressionLearner } arg_space = io_utils.load(arg_space_files[0], Space) arg_space2 = None if len(arg_space_files) == 2: arg_space2 = io_utils.load(arg_space_files[1], Space) phrase_space = io_utils.load(phrase_space_file, Space) if not model in model_dict: raise ValueError("Invalid model:%s for training" % model) model_cls = model_dict[model] if model_cls in (WeightedAdditive, Dilation): model_obj = model_cls() else: if regression == "ridge": regression_obj = learner_dict[regression](crossvalidation=crossvalid, intercept=intercept, param=param, param_range=param_range) model_obj = model_cls(learner=regression_obj) elif regression == "lstsq": regression_obj = learner_dict[regression](intercept=intercept) model_obj = model_cls(learner=regression_obj) else: model_obj = model_cls() train_data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2]) print "Training %s model" % model if arg_space2 is None or model == "lexical_func": model_obj.train(train_data, arg_space, phrase_space) else: model_obj.train(train_data, (arg_space, arg_space2), phrase_space) print "Printing..." out_file = ".".join([out_dir + "/TRAINED_COMP_MODEL", model, in_descr]) io_utils.save(model_obj, "%s.pkl" % out_file) if export_params: model_obj.export("%s.params" % out_file)
from composes.composition.multiplicative import Multiplicative from composes.transformation.scaling.row_normalization import RowNormalization import numpy as np import sys #read in a space my_space = io_utils.load(sys.argv[1]) my_space = my_space.apply(RowNormalization()) add = WeightedAdditive(alpha = 1, beta = 1) mult = Multiplicative() #compute multiplication/addition of a list of word pairs fname = sys.argv[2] word_pairs = io_utils.read_tuple_list(fname, fields=[0,1]) lengths=[] found=True for wp in word_pairs: try: v1=my_space.get_row(wp[0]) v2=my_space.get_row(wp[1]) except KeyError: #print wp[0],"or",wp[1],"not found" found=False if found: composed_space = add.compose([(wp[0], wp[1], "_composed_")], my_space) neighbours=composed_space.get_neighbours("_composed_", 10, CosSimilarity(),space2=my_space) print wp[0],wp[1] print neighbours
print "Applying feature selection..." space = space.apply(TopFeatureSelection(2000)) print "Applying SVD..." space = space.apply(Svd(100)) print "Creating peripheral space.." per_space = PeripheralSpace.build(space, data=data_path + "per.raw.SV.sm", cols=data_path + "per.raw.SV.cols", format="sm") #reading in train data train_data_file = data_path + "ML08_SV_train.txt" train_data = io_utils.read_tuple_list(train_data_file, fields=[0, 1, 2]) print "Training Lexical Function composition model..." comp_model = LexicalFunction(learner=RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) print "Composing phrases..." test_phrases_file = data_path + "ML08nvs_test.txt" test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0, 1, 2]) composed_space = comp_model.compose(test_phrases, space) print "Reading similarity test data..." test_similarity_file = data_path + "ML08data_new.txt" test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1]) gold = io_utils.read_list(test_similarity_file, field=2)
#ex20.py #------- from composes.utils import io_utils from composes.utils import scoring_utils from composes.similarity.cos import CosSimilarity #read in a space my_space = io_utils.load("data/out/ex01.pkl") #compute similarities of a list of word pairs fname = "data/in/word_sims.txt" word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1]) predicted = my_space.get_sims(word_pairs, CosSimilarity()) #compute correlations gold = io_utils.read_list(fname, field=2) print "Spearman" print scoring_utils.score(gold, predicted, "spearman") print "Pearson" print scoring_utils.score(gold, predicted, "pearson")
print "Applying feature selection..." space = space.apply(TopFeatureSelection(2000)) print "Applying SVD..." space = space.apply(Svd(100)) print "Creating peripheral space.." per_space = PeripheralSpace.build(space, data = data_path + "per.raw.SV.sm", cols = data_path + "per.raw.SV.cols", format = "sm" ) #reading in train data train_data_file = data_path + "ML08_SV_train.txt" train_data = io_utils.read_tuple_list(train_data_file, fields=[0,1,2]) print "Training Lexical Function composition model..." comp_model = LexicalFunction(learner = RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) print "Composing phrases..." test_phrases_file = data_path + "ML08nvs_test.txt" test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0,1,2]) composed_space = comp_model.compose(test_phrases, space) print "Reading similarity test data..." test_similarity_file = data_path + "ML08data_new.txt" test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0,1]) gold = io_utils.read_list(test_similarity_file, field=2)