class WordnetTestCase(TestCase): def setUp(self): super(WordnetTestCase, self).setUp() from chatterbot.utils.wordnet import Wordnet self.wordnet = Wordnet() def test_wordnet(self): synsets = self.wordnet.synsets('test') self.assertEqual(0.06666666666666667, synsets[0].path_similarity(synsets[1]))
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The percent of similarity between the closest synset distance. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) # The maximum possible similarity is an exact match # Because path_similarity returns a value between 0 and 1, # max_possible_similarity is the number of words in the longer # of the two input statements. max_possible_similarity = max( len(statement.text.split()), len(other_statement.text.split()) ) max_similarity = 0.0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity if max_possible_similarity == 0: return 0 return max_similarity / max_possible_similarity
class WordnetTestCase(TestCase): def setUp(self): super(WordnetTestCase, self).setUp() from chatterbot.utils.wordnet import Wordnet self.wordnet = Wordnet() def test_wordnet(self): synsets = self.wordnet.synsets('test') self.assertEqual( 0.06666666666666667, synsets[0].path_similarity(synsets[1]) )
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The percent of similarity between the closest synset distance. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) # The maximum possible similarity is an exact match # Because path_similarity returns a value between 0 and 1, # max_possible_similarity is the number of words in the longer # of the two input statements. max_possible_similarity = max(len(statement.text.split()), len(other_statement.text.split())) max_similarity = 0.0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity if max_possible_similarity == 0: return 0 return max_similarity / max_possible_similarity
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. :return: The ratio of difference between the synset distance of both statements. :rtype: float """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total maximum synset similarity between each word in each sentence. """ from chatterbot.utils.wordnet import Wordnet from chatterbot.utils.tokenizer import Tokenizer import itertools wordnet = Wordnet() tokenizer = Tokenizer() tokens1 = tokenizer.get_tokens(statement.text) tokens2 = tokenizer.get_tokens(other_statement.text) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity