Ejemplo n.º 1
0
class SynsetExpansion:

    def __init__(self, th, sim):
        self.semsim = SemanticSimilarity()
        self.th = th
        self.sim = sim

    def expansion(self, seeds):
        result_tuple = []
        result_lst = []
        for s in seeds:
             self.expander((s,1.0), s, self.th, self.semsim.sim(self.sim), result_lst, result_tuple)
        return result_lst, result_tuple

    def expander(self, c, s, th, sim, result_lst, result_tuple):
        concept, score = c
        result_tuple.append(c)
        result_lst.append(concept)
        for x in concept.hypernyms():
            sim_score = sim(s,x)
            if x not in result_lst and sim_score >= th:
                self.expander((x, sim_score), s, th, sim, result_lst, result_tuple)
        for y in concept.hyponyms():
            sim_score = sim(s,y)
            if y not in result_lst and sim_score >= th:
                self.expander((y, sim_score), s, th, sim, result_lst, result_tuple)

    def synsets_exapnsion(self, term):
        seeds = self.synsets_mapping(term)
        return self.expansion(seeds)

    def synsets_mapping(self, term):
        return wn.synsets(term, pos=wn.NOUN)
Ejemplo n.º 2
0
 def __init__(self):
     self.datasets = [
         "combined.csv",
         "EN-MC-30.txt",
         "EN-RG-65.txt",
         "MEN_dataset_lemma_form_full",
         "Mtruk.csv",
         "rw.txt",
         "SimLex-999.txt",
         "wordsim_relatedness_goldstandard.txt",
         "wordsim_similarity_goldstandard.txt",
     ]
     self.semsim = SemanticSimilarity()
Ejemplo n.º 3
0
class Evaluation:
    def __init__(self):
        self.datasets = [
            "combined.csv",
            "EN-MC-30.txt",
            "EN-RG-65.txt",
            "MEN_dataset_lemma_form_full",
            "Mtruk.csv",
            "rw.txt",
            "SimLex-999.txt",
            "wordsim_relatedness_goldstandard.txt",
            "wordsim_similarity_goldstandard.txt",
        ]
        self.semsim = SemanticSimilarity()

    def process_dataset(self, name):
        data = FileIO.read_list_file(name)
        new_data = []
        for d in data:
            d_list = d.split("\t")
            new_data.append("\t".join([d_list[0], d_list[1], d_list[2]]))
        FileIO.save_list_file("eval-datasets/lexical/7.txt", new_data)

    def load_dataset(self, dataset_id):
        data = FileIO.read_list_file("eval-datasets/lexical/%s.txt" % dataset_id)
        word_pairs = map(lambda x: (x.split()[0], x.split()[1]), data)
        human = [d.split()[2] for d in data]
        return word_pairs, human

    def correlation(self, name):
        def function(x, y):
            return getattr(self, name)(x, y)

        return function

    def spearman(self, x, y):
        return spearmanr(x, y)

    def pearson(self, x, y):
        return pearsonr(x, y)

    def kendal(self, x, y):
        return kendalltau(x, y)

    def sim_measure(self, word_pairs, method):
        scores = [method(x, y) for x, y in word_pairs]
        return scores

    def wordnet(self, name):
        def function(x, y):
            sim = self.semsim.sim(name)
            print x, y
            synsets1 = wn.synsets(x)
            synsets2 = wn.synsets(y)
            scores = [sim(syn1, syn2) for syn1 in synsets1 for syn2 in synsets2]
            print max(scores)
            return max(scores)

        return function

    def compare_correlation(self, cor_method, scores, human):
        cor = self.correlation(cor_method)(scores, human)
        return cor[0]
Ejemplo n.º 4
0
 def __init__(self, th, sim):
     self.semsim = SemanticSimilarity()
     self.th = th
     self.sim = sim