def directional_sen_similarity(self, sen1, sen2, fallback): return average(( my_max((self.word_sim.word_similarity( word1['token'], word2['token'], -1, -1, fallback=fallback) for word2 in sen2)) for word1 in sen1))
def _all_pairs_similarity(self, machine1, machine2): words1 = set( MachineTraverser.get_nodes(machine1, exclude_words=self.stopwords)) words2 = set( MachineTraverser.get_nodes(machine2, exclude_words=self.stopwords)) pair_sims_by_word = defaultdict(dict) for word1 in words1: for word2 in words2: sim = self.word_similarity(word1, word2, -1, -1, sim_type="strict_links_and_nodes") pair_sims_by_word[word1][word2] = sim if sim else 0.0 pair_sims_by_word[word2][word1] = sim if sim else 0.0 max_sims_by_word = dict( ((word, my_max(pair_sims_by_word[word].itervalues())) for word in words1 | words2)) sim = average((average((max_sims_by_word[w] for w in words1)), average((max_sims_by_word[w] for w in words2)))) #sim = max((my_max((max_sims_by_word[w] for w in words1)), # my_max((max_sims_by_word[w] for w in words2)))) if sim: self.log( "{0} - {1} all_pairs similarity: {2} based on: {3}".format( machine1.printname(), machine2.printname(), sim, pair_sims_by_word)) return sim
def _all_pairs_similarity(self, machine1, machine2): words1 = set(MachineTraverser.get_nodes(machine1, exclude_words=self.stopwords)) words2 = set(MachineTraverser.get_nodes(machine2, exclude_words=self.stopwords)) pair_sims_by_word = defaultdict(dict) for word1 in words1: for word2 in words2: sim = self.word_similarity(word1, word2, -1, -1, sim_type="strict_links_and_nodes") pair_sims_by_word[word1][word2] = sim if sim else 0.0 pair_sims_by_word[word2][word1] = sim if sim else 0.0 max_sims_by_word = dict(( (word, my_max(pair_sims_by_word[word].itervalues())) for word in words1 | words2)) sim = average((average((max_sims_by_word[w] for w in words1)), average((max_sims_by_word[w] for w in words2)))) #sim = max((my_max((max_sims_by_word[w] for w in words1)), # my_max((max_sims_by_word[w] for w in words2)))) if sim: self.log( "{0} - {1} all_pairs similarity: {2} based on: {3}".format( machine1.printname(), machine2.printname(), sim, pair_sims_by_word)) return sim