def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The percent of similarity between the closest synset distance.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    # The maximum possible similarity is an exact match
    # Because path_similarity returns a value between 0 and 1,
    # max_possible_similarity is the number of words in the longer
    # of the two input statements.
    max_possible_similarity = max(
        len(statement.text.split()),
        len(other_statement.text.split())
    )

    max_similarity = 0.0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

    if max_possible_similarity == 0:
        return 0

    return max_similarity / max_possible_similarity
Beispiel #2
0
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The percent of similarity between the closest synset distance.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    # The maximum possible similarity is an exact match
    # Because path_similarity returns a value between 0 and 1,
    # max_possible_similarity is the number of words in the longer
    # of the two input statements.
    max_possible_similarity = max(len(statement.text.split()),
                                  len(other_statement.text.split()))

    max_similarity = 0.0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

    if max_possible_similarity == 0:
        return 0

    return max_similarity / max_possible_similarity
Beispiel #3
0
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The ratio of difference between the synset distance of both statements.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity
Beispiel #4
0
class WordnetTestCase(TestCase):
    def setUp(self):
        super(WordnetTestCase, self).setUp()
        from chatterbot.utils.wordnet import Wordnet

        self.wordnet = Wordnet()

    def test_wordnet(self):
        synsets = self.wordnet.synsets('test')

        self.assertEqual(0.06666666666666667,
                         synsets[0].path_similarity(synsets[1]))
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity
class WordnetTestCase(TestCase):

    def setUp(self):
        super(WordnetTestCase, self).setUp()
        from chatterbot.utils.wordnet import Wordnet

        self.wordnet = Wordnet()

    def test_wordnet(self):
        synsets = self.wordnet.synsets('test')

        self.assertEqual(
            0.06666666666666667,
            synsets[0].path_similarity(synsets[1])
        )
Beispiel #7
0
    def setUp(self):
        super(WordnetTestCase, self).setUp()
        from chatterbot.utils.wordnet import Wordnet

        self.wordnet = Wordnet()
    def setUp(self):
        super(WordnetTestCase, self).setUp()
        from chatterbot.utils.wordnet import Wordnet

        self.wordnet = Wordnet()