def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The percent of similarity between the closest synset distance. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) # The maximum possible similarity is an exact match # Because path_similarity returns a value between 0 and 1, # max_possible_similarity is the number of words in the longer # of the two input statements. max_possible_similarity = max( len(statement.text.split()), len(other_statement.text.split()) ) max_similarity = 0.0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity if max_possible_similarity == 0: return 0 return max_similarity / max_possible_similarity
class TokenizerTestCase(TestCase): def setUp(self): super(TokenizerTestCase, self).setUp() from chatterbot.utils.tokenizer import Tokenizer self.tokenizer = Tokenizer() def test_get_tokens(self): tokens = self.tokenizer.get_tokens('what time is it', exclude_stop_words=False) self.assertEqual(tokens, ['what', 'time', 'is', 'it']) def test_get_tokens_exclude_stop_words(self): tokens = self.tokenizer.get_tokens('what time is it', exclude_stop_words=True) self.assertEqual(tokens, {'time'})
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The percent of similarity between the closest synset distance. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) # The maximum possible similarity is an exact match # Because path_similarity returns a value between 0 and 1, # max_possible_similarity is the number of words in the longer # of the two input statements. max_possible_similarity = max(len(statement.text.split()), len(other_statement.text.split())) max_similarity = 0.0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity if max_possible_similarity == 0: return 0 return max_similarity / max_possible_similarity
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The ratio of difference between the synset distance of both statements. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity
def setUp(self): super(TokenizerTestCase, self).setUp() from chatterbot.utils.tokenizer import Tokenizer self.tokenizer = Tokenizer()