Exemple #1
0
    def __maxSimC(self, vecs_a, context_a, vecs_b, context_b):
        vecmanip = VectorManipulation()
        closest_a = self.__closestSenseContext(vecs_a, context_a)
        closest_b = self.__closestSenseContext(vecs_b, context_b)

        result = vecmanip.cosine_similarity(closest_a, closest_b)

        return result
Exemple #2
0
    def globalSim(self, vecs_a, vecs_b):
        vecmanip = VectorManipulation()
        if not vecs_a or not vecs_b:
            global_sim = 0.0
        else:
            global_a = numpy.average(vecs_a, axis=0)
            global_b = numpy.average(vecs_b, axis=0)
            global_sim = vecmanip.cosine_similarity(global_a, global_b)

        return global_sim
Exemple #3
0
    def maxSim(self, vecs_a, vecs_b):
        vecmanip = VectorManipulation()
        highest = -1.0

        for vec_a in vecs_a:
            for vec_b in vecs_b:
                tmp_high = vecmanip.cosine_similarity(vec_a, vec_b)
                if tmp_high > highest:
                    highest = tmp_high
        return highest
Exemple #4
0
 def test_SpearmanGlobal(self):
     fio = FileManipulation()
     vec_manip = VectorManipulation()
     gold_tokens = fio.readFileLine('scws.txt')
     ruby_global = fio.readFileLine('scws_globsim.txt')
     ruby_globalc = fio.readFileLine('scws_globsimc.txt')
     t11, __ = vec_manip.spearmanCorrelation(gold_tokens,
                                             ruby_global)  # not using rho
     t21, __ = vec_manip.spearmanCorrelation(gold_tokens,
                                             ruby_globalc)  # not using rho
     self.assertEqual('0.6670118503142607', str(t11))
     self.assertEqual(numpy.float64('0.2969117412433547'), t21)
Exemple #5
0
 def test_SpearmanMax(self):
     fio = FileManipulation()
     vec_manip = VectorManipulation()
     gold_tokens = fio.readFileLine('scws.txt')
     ruby_max = fio.readFileLine('scws_maxsim.txt')
     ruby_maxc = fio.readFileLine('scws_maxsimc.txt')
     t11, __ = vec_manip.spearmanCorrelation(gold_tokens,
                                             ruby_max)  # not using rho
     t21, __ = vec_manip.spearmanCorrelation(gold_tokens,
                                             ruby_maxc)  # not using rho
     self.assertEqual('0.6127420529962664', str(t11))
     self.assertEqual(numpy.float64('0.6367583108796157'), t21)
Exemple #6
0
 def test_SpearmanAvg(self):
     fio = FileManipulation()
     vec_manip = VectorManipulation()
     gold_tokens = fio.readFileLine('scws.txt')
     ruby_avg = fio.readFileLine('scws_avgsim.txt')
     ruby_avgc = fio.readFileLine('scws_avgsimc.txt')
     t11, __ = vec_manip.spearmanCorrelation(gold_tokens,
                                             ruby_avg)  # not using rho
     t21, __ = vec_manip.spearmanCorrelation(gold_tokens,
                                             ruby_avgc)  # not using rho
     self.assertEqual('0.6672948584312471', str(t11))
     self.assertEqual(numpy.float64('0.5809138966365319'), t21)
Exemple #7
0
    def __closestSenseContext(self, synset_vecs, contextvec):
        vecmanip = VectorManipulation()
        high_so_far = -1.0
        nearest = []

        for synset_vec in synset_vecs:  # closest sense (synset_vec) of 'word-A' to its context
            context_sim = vecmanip.cosine_similarity(synset_vec, contextvec)
            if context_sim > high_so_far:
                high_so_far = context_sim
                nearest = synset_vec

        return nearest
Exemple #8
0
    def avgSim(self, vecs_a, vecs_b):
        vecmanip = VectorManipulation()
        partial_sim = 0.0

        for vec_a in vecs_a:
            for vec_b in vecs_b:
                tmp_ab = vecmanip.cosine_similarity(vec_a, vec_b)
                partial_sim += tmp_ab

        if not vecs_a or not vecs_b:
            final_sim = 0.0
        else:
            final_sim = (partial_sim / (len(vecs_a) * len(vecs_b)))

        return final_sim
Exemple #9
0
    def __avgSimC(self, vecs_a, context_a, vecs_b, context_b):
        vecmanip = VectorManipulation()
        partial_sim = 0.0

        for vec_a in vecs_a:
            pcwa = vecmanip.cosine_similarity(vec_a, context_a)
            for vec_b in vecs_b:
                pcwb = vecmanip.cosine_similarity(vec_b, context_b)
                dwab = vecmanip.cosine_similarity(vec_a, vec_b)
                partial_sim += pcwa * pcwb * dwab

        if not vecs_a or not vecs_b:
            final_sim = 0.0
        else:
            final_sim = (partial_sim / (len(vecs_a) * len(vecs_b)))

        return final_sim
Exemple #10
0
    def __contextParser(self, text_items, trained_model):
        track_synset = SynsetParserVector()
        vector_manip = VectorManipulation()
        context_vector = []

        for text_item in text_items:
            synsets = wn.synsets(text_item)
            for synset in synsets:
                key = track_synset.keyParser(text_item, synset)
                try:
                    key_vector = trained_model.word_vec(key)
                    context_vector.append(
                        key_vector
                    )  # put all vector words in the sentence together
                except KeyError:
                    pass

        return numpy.average(context_vector, axis=0)
Exemple #11
0
 def test_cosine_similarity(self):
     vec_manip = VectorManipulation()
     a = [1, 0]
     b = [-1, 0]
     self.assertEqual(-1, vec_manip.cosine_similarity(a, b))
Exemple #12
0
    def __globalSimC(self, context_a, context_b):
        vecmanip = VectorManipulation()
        global_simc = vecmanip.cosine_similarity(context_a, context_b)

        return global_simc
Exemple #13
0
# python module absolute path
pydir_name = os.path.dirname(os.path.abspath(__file__))
ppydir_name = os.path.dirname(pydir_name)

# python path definition
sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir))

# local-imports
from utilities.commandLine import CommandLineStats
from utilities.fileOperations import FileManipulation
from vecmanip.vectorOperations import VectorManipulation

if __name__ == '__main__':
    params = CommandLineStats()  # command line parameter validation
    fio = FileManipulation()
    stats_metrics = VectorManipulation()

    gold_path = os.path.join(ppydir_name, params.gold_input)
    ruby_path = os.path.join(ppydir_name, params.ruby_input)
    ou_loc = os.path.join(ppydir_name, params.output_folder)

    gold = fio.readFileLine(gold_path)
    docs = fio.doclist_multifolder(ruby_path)
    result = "Metric\tSpearman\tS-Rho\n"

    for doc in docs:
        ruby = fio.readFileLine(doc)
        fname = doc.split(os.sep)
        fname = fname[-1]
        tmp_sp, tmp_rho = stats_metrics.spearmanCorrelation(gold, ruby)
        result += fname[:-3] + '\t' + str(tmp_sp) + '\t' + str(tmp_rho) + '\n'