Example #1
0
    def _all_pairs_similarity(self, machine1, machine2):
        words1 = set(
            MachineTraverser.get_nodes(machine1, exclude_words=self.stopwords))
        words2 = set(
            MachineTraverser.get_nodes(machine2, exclude_words=self.stopwords))
        pair_sims_by_word = defaultdict(dict)
        for word1 in words1:
            for word2 in words2:
                sim = self.word_similarity(word1,
                                           word2,
                                           -1,
                                           -1,
                                           sim_type="strict_links_and_nodes")
                pair_sims_by_word[word1][word2] = sim if sim else 0.0
                pair_sims_by_word[word2][word1] = sim if sim else 0.0

        max_sims_by_word = dict(
            ((word, my_max(pair_sims_by_word[word].itervalues()))
             for word in words1 | words2))

        sim = average((average((max_sims_by_word[w] for w in words1)),
                       average((max_sims_by_word[w] for w in words2))))
        #sim = max((my_max((max_sims_by_word[w] for w in words1)),
        #           my_max((max_sims_by_word[w] for w in words2))))
        if sim:
            self.log(
                "{0} - {1} all_pairs similarity: {2} based on: {3}".format(
                    machine1.printname(), machine2.printname(), sim,
                    pair_sims_by_word))
        return sim
Example #2
0
    def _all_pairs_similarity(self, machine1, machine2):
        words1 = set(MachineTraverser.get_nodes(machine1,
                                                exclude_words=self.stopwords))
        words2 = set(MachineTraverser.get_nodes(machine2,
                                                exclude_words=self.stopwords))
        pair_sims_by_word = defaultdict(dict)
        for word1 in words1:
            for word2 in words2:
                sim = self.word_similarity(word1, word2, -1, -1,
                                           sim_type="strict_links_and_nodes")
                pair_sims_by_word[word1][word2] = sim if sim else 0.0
                pair_sims_by_word[word2][word1] = sim if sim else 0.0

        max_sims_by_word = dict((
            (word, my_max(pair_sims_by_word[word].itervalues()))
            for word in words1 | words2))

        sim = average((average((max_sims_by_word[w] for w in words1)),
                       average((max_sims_by_word[w] for w in words2))))
        #sim = max((my_max((max_sims_by_word[w] for w in words1)),
        #           my_max((max_sims_by_word[w] for w in words2))))
        if sim:
            self.log(
                "{0} - {1} all_pairs similarity: {2} based on: {3}".format(
                    machine1.printname(), machine2.printname(), sim,
                    pair_sims_by_word))
        return sim
Example #3
0
 def directional_sen_similarity(self, sen1, sen2, fallback):
     return average((
         my_max((self.word_sim.word_similarity(
             word1['token'], word2['token'], -1, -1,
             fallback=fallback)
             for word2 in sen2))
         for word1 in sen1))
Example #4
0
 def directional_sen_similarity(self, sen1, sen2, fallback):
     return average((
         my_max((self.word_sim.word_similarity(
             word1['token'], word2['token'], -1, -1,
             fallback=fallback)
             for word2 in sen2))
         for word1 in sen1))