Example #1
0
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The percent of similarity between the closest synset distance.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    # The maximum possible similarity is an exact match
    # Because path_similarity returns a value between 0 and 1,
    # max_possible_similarity is the number of words in the longer
    # of the two input statements.
    max_possible_similarity = max(
        len(statement.text.split()),
        len(other_statement.text.split())
    )

    max_similarity = 0.0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

    if max_possible_similarity == 0:
        return 0

    return max_similarity / max_possible_similarity
Example #2
0
class TokenizerTestCase(TestCase):

    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()

    def test_get_tokens(self):
        tokens = self.tokenizer.get_tokens('what time is it', exclude_stop_words=False)
        self.assertEqual(tokens, ['what', 'time', 'is', 'it'])

    def test_get_tokens_exclude_stop_words(self):
        tokens = self.tokenizer.get_tokens('what time is it', exclude_stop_words=True)
        self.assertEqual(tokens, {'time'})
Example #3
0
class TokenizerTestCase(TestCase):
    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()

    def test_get_tokens(self):
        tokens = self.tokenizer.get_tokens('what time is it',
                                           exclude_stop_words=False)
        self.assertEqual(tokens, ['what', 'time', 'is', 'it'])

    def test_get_tokens_exclude_stop_words(self):
        tokens = self.tokenizer.get_tokens('what time is it',
                                           exclude_stop_words=True)
        self.assertEqual(tokens, {'time'})
Example #4
0
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The percent of similarity between the closest synset distance.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    # The maximum possible similarity is an exact match
    # Because path_similarity returns a value between 0 and 1,
    # max_possible_similarity is the number of words in the longer
    # of the two input statements.
    max_possible_similarity = max(len(statement.text.split()),
                                  len(other_statement.text.split()))

    max_similarity = 0.0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

    if max_possible_similarity == 0:
        return 0

    return max_similarity / max_possible_similarity
Example #5
0
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The ratio of difference between the synset distance of both statements.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity
Example #7
0
    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()
Example #8
0
    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()