def compute_neighbours(in_file, no_neighbours, out_dir, sim_measure, space_files):
    sim_dict = {"cos": CosSimilarity(),
                "lin": LinSimilarity(),
                "dot_prod": DotProdSimilarity(),
                "euclidean": EuclideanSimilarity()}
    
    if not sim_measure in sim_dict:
        raise ValueError("Similarity measure:%s not defined" % sim_measure)
    
    space = io_utils.load(space_files[0], Space)
    space2 = None
    space_descr = ".".join(space_files[0].split("/")[-1].split(".")[0:-1])
    if len(space_files) == 2:
        space2 = io_utils.load(space_files[1], Space)
        space_descr = ".".join([space_descr] + space_files[1].split("/")[-1].split(".")[0:-1])
        
    sim = sim_dict[sim_measure]
    
    descr = ".".join(["NEIGHBOURS", in_file.split("/")[-1], space_descr])
    out_file = '%s/%s.%s' % (out_dir, descr, sim_measure)
    io_utils.create_parent_directories(out_file)
        
    data = io_utils.read_list(in_file)

    print "Computing neighbours: %s" % sim_measure 
    with open(out_file,"w") as out_stream:
        for word in data:
            out_stream.write("%s\n" % word)
            result = space.get_neighbours(word, no_neighbours, sim, space2)
            for neighbour, neighbour_sim in result:
                out_stream.write("\t%s %s\n" % (neighbour, neighbour_sim)) 
Esempio n. 2
0
def compute_neighbours(in_file, no_neighbours, out_dir, sim_measure,
                       space_files):
    sim_dict = {
        "cos": CosSimilarity(),
        "lin": LinSimilarity(),
        "dot_prod": DotProdSimilarity(),
        "euclidean": EuclideanSimilarity()
    }

    if not sim_measure in sim_dict:
        raise ValueError("Similarity measure:%s not defined" % sim_measure)

    space = io_utils.load(space_files[0], Space)
    space2 = None
    space_descr = ".".join(space_files[0].split("/")[-1].split(".")[0:-1])
    if len(space_files) == 2:
        space2 = io_utils.load(space_files[1], Space)
        space_descr = ".".join([space_descr] +
                               space_files[1].split("/")[-1].split(".")[0:-1])

    sim = sim_dict[sim_measure]

    descr = ".".join(["NEIGHBOURS", in_file.split("/")[-1], space_descr])
    out_file = '%s/%s.%s' % (out_dir, descr, sim_measure)
    io_utils.create_parent_directories(out_file)

    data = io_utils.read_list(in_file)

    print("Computing neighbours: %s" % sim_measure)
    with open(out_file, "w") as out_stream:
        for word in data:
            out_stream.write("%s\n" % word)
            result = space.get_neighbours(word, no_neighbours, sim, space2)
            for neighbour, neighbour_sim in result:
                out_stream.write("\t%s %s\n" % (neighbour, neighbour_sim))
Esempio n. 3
0
train_data_file = data_path + "ML08_SV_train.txt"
train_data = io_utils.read_tuple_list(train_data_file, fields=[0, 1, 2])

print "Training Lexical Function composition model..."
comp_model = LexicalFunction(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)

print "Composing phrases..."
test_phrases_file = data_path + "ML08nvs_test.txt"
test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0, 1, 2])
composed_space = comp_model.compose(test_phrases, space)

print "Reading similarity test data..."
test_similarity_file = data_path + "ML08data_new.txt"
test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1])
gold = io_utils.read_list(test_similarity_file, field=2)

print "Computing similarity with lexical function..."
pred = composed_space.get_sims(test_pairs, CosSimilarity())

#use this composed space to assign similarities
print "Scoring lexical function..."
print scoring_utils.score(gold, pred, "spearman")

print "Training Full Additive composition model..."
comp_model = FullAdditive(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")
Esempio n. 4
0
#ex20.py
#-------
from composes.utils import io_utils
from composes.utils import scoring_utils
from composes.similarity.cos import CosSimilarity

#read in a space
my_space = io_utils.load("data/out/ex01.pkl")

#compute similarities of a list of word pairs
fname = "data/in/word_sims.txt"
word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1])
predicted = my_space.get_sims(word_pairs, CosSimilarity())

#compute correlations
gold = io_utils.read_list(fname, field=2)
print "Spearman"
print scoring_utils.score(gold, predicted, "spearman")
print "Pearson"
print scoring_utils.score(gold, predicted, "pearson")
Esempio n. 5
0
train_data_file = data_path + "ML08_SV_train.txt"
train_data = io_utils.read_tuple_list(train_data_file, fields=[0,1,2])

print "Training Lexical Function composition model..."
comp_model = LexicalFunction(learner = RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)

print "Composing phrases..."
test_phrases_file = data_path + "ML08nvs_test.txt" 
test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0,1,2])
composed_space = comp_model.compose(test_phrases, space)

print "Reading similarity test data..."
test_similarity_file = data_path + "ML08data_new.txt"
test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0,1])
gold = io_utils.read_list(test_similarity_file, field=2)

print "Computing similarity with lexical function..."
pred = composed_space.get_sims(test_pairs, CosSimilarity())

#use this composed space to assign similarities
print "Scoring lexical function..."
print scoring_utils.score(gold, pred, "spearman")
                    

print "Training Full Additive composition model..."
comp_model = FullAdditive(learner = RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")
Esempio n. 6
0
# ex20.py
# -------
from composes.utils import io_utils
from composes.utils import scoring_utils
from composes.similarity.cos import CosSimilarity

# read in a space
my_space = io_utils.load("data/out/ex01.pkl")

# compute similarities of a list of word pairs
fname = "data/in/word_sims.txt"
word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1])
predicted = my_space.get_sims(word_pairs, CosSimilarity())

# compute correlations
gold = io_utils.read_list(fname, field=2)
print "Spearman"
print scoring_utils.score(gold, predicted, "spearman")
print "Pearson"
print scoring_utils.score(gold, predicted, "pearson")