def compute_neighbours(in_file, no_neighbours, out_dir, sim_measure, space_files): sim_dict = {"cos": CosSimilarity(), "lin": LinSimilarity(), "dot_prod": DotProdSimilarity(), "euclidean": EuclideanSimilarity()} if not sim_measure in sim_dict: raise ValueError("Similarity measure:%s not defined" % sim_measure) space = io_utils.load(space_files[0], Space) space2 = None space_descr = ".".join(space_files[0].split("/")[-1].split(".")[0:-1]) if len(space_files) == 2: space2 = io_utils.load(space_files[1], Space) space_descr = ".".join([space_descr] + space_files[1].split("/")[-1].split(".")[0:-1]) sim = sim_dict[sim_measure] descr = ".".join(["NEIGHBOURS", in_file.split("/")[-1], space_descr]) out_file = '%s/%s.%s' % (out_dir, descr, sim_measure) io_utils.create_parent_directories(out_file) data = io_utils.read_list(in_file) print "Computing neighbours: %s" % sim_measure with open(out_file,"w") as out_stream: for word in data: out_stream.write("%s\n" % word) result = space.get_neighbours(word, no_neighbours, sim, space2) for neighbour, neighbour_sim in result: out_stream.write("\t%s %s\n" % (neighbour, neighbour_sim))
def compute_neighbours(in_file, no_neighbours, out_dir, sim_measure, space_files): sim_dict = { "cos": CosSimilarity(), "lin": LinSimilarity(), "dot_prod": DotProdSimilarity(), "euclidean": EuclideanSimilarity() } if not sim_measure in sim_dict: raise ValueError("Similarity measure:%s not defined" % sim_measure) space = io_utils.load(space_files[0], Space) space2 = None space_descr = ".".join(space_files[0].split("/")[-1].split(".")[0:-1]) if len(space_files) == 2: space2 = io_utils.load(space_files[1], Space) space_descr = ".".join([space_descr] + space_files[1].split("/")[-1].split(".")[0:-1]) sim = sim_dict[sim_measure] descr = ".".join(["NEIGHBOURS", in_file.split("/")[-1], space_descr]) out_file = '%s/%s.%s' % (out_dir, descr, sim_measure) io_utils.create_parent_directories(out_file) data = io_utils.read_list(in_file) print("Computing neighbours: %s" % sim_measure) with open(out_file, "w") as out_stream: for word in data: out_stream.write("%s\n" % word) result = space.get_neighbours(word, no_neighbours, sim, space2) for neighbour, neighbour_sim in result: out_stream.write("\t%s %s\n" % (neighbour, neighbour_sim))
train_data_file = data_path + "ML08_SV_train.txt" train_data = io_utils.read_tuple_list(train_data_file, fields=[0, 1, 2]) print "Training Lexical Function composition model..." comp_model = LexicalFunction(learner=RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) print "Composing phrases..." test_phrases_file = data_path + "ML08nvs_test.txt" test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0, 1, 2]) composed_space = comp_model.compose(test_phrases, space) print "Reading similarity test data..." test_similarity_file = data_path + "ML08data_new.txt" test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1]) gold = io_utils.read_list(test_similarity_file, field=2) print "Computing similarity with lexical function..." pred = composed_space.get_sims(test_pairs, CosSimilarity()) #use this composed space to assign similarities print "Scoring lexical function..." print scoring_utils.score(gold, pred, "spearman") print "Training Full Additive composition model..." comp_model = FullAdditive(learner=RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) composed_space = comp_model.compose(test_phrases, space) pred = composed_space.get_sims(test_pairs, CosSimilarity()) print scoring_utils.score(gold, pred, "spearman")
#ex20.py #------- from composes.utils import io_utils from composes.utils import scoring_utils from composes.similarity.cos import CosSimilarity #read in a space my_space = io_utils.load("data/out/ex01.pkl") #compute similarities of a list of word pairs fname = "data/in/word_sims.txt" word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1]) predicted = my_space.get_sims(word_pairs, CosSimilarity()) #compute correlations gold = io_utils.read_list(fname, field=2) print "Spearman" print scoring_utils.score(gold, predicted, "spearman") print "Pearson" print scoring_utils.score(gold, predicted, "pearson")
train_data_file = data_path + "ML08_SV_train.txt" train_data = io_utils.read_tuple_list(train_data_file, fields=[0,1,2]) print "Training Lexical Function composition model..." comp_model = LexicalFunction(learner = RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) print "Composing phrases..." test_phrases_file = data_path + "ML08nvs_test.txt" test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0,1,2]) composed_space = comp_model.compose(test_phrases, space) print "Reading similarity test data..." test_similarity_file = data_path + "ML08data_new.txt" test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0,1]) gold = io_utils.read_list(test_similarity_file, field=2) print "Computing similarity with lexical function..." pred = composed_space.get_sims(test_pairs, CosSimilarity()) #use this composed space to assign similarities print "Scoring lexical function..." print scoring_utils.score(gold, pred, "spearman") print "Training Full Additive composition model..." comp_model = FullAdditive(learner = RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) composed_space = comp_model.compose(test_phrases, space) pred = composed_space.get_sims(test_pairs, CosSimilarity()) print scoring_utils.score(gold, pred, "spearman")
# ex20.py # ------- from composes.utils import io_utils from composes.utils import scoring_utils from composes.similarity.cos import CosSimilarity # read in a space my_space = io_utils.load("data/out/ex01.pkl") # compute similarities of a list of word pairs fname = "data/in/word_sims.txt" word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1]) predicted = my_space.get_sims(word_pairs, CosSimilarity()) # compute correlations gold = io_utils.read_list(fname, field=2) print "Spearman" print scoring_utils.score(gold, predicted, "spearman") print "Pearson" print scoring_utils.score(gold, predicted, "pearson")