def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The percent of similarity between the closest synset distance. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) # The maximum possible similarity is an exact match # Because path_similarity returns a value between 0 and 1, # max_possible_similarity is the number of words in the longer # of the two input statements. max_possible_similarity = max(len(statement.text.split()), len(other_statement.text.split())) max_similarity = 0.0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity if max_possible_similarity == 0: return 0 return max_similarity / max_possible_similarity
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The ratio of difference between the synset distance of both statements. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity
def setUp(self): super(WordnetTestCase, self).setUp() from chatterbot.utils.wordnet import Wordnet self.wordnet = Wordnet()