Python Tokenizer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: chatterbot.utils.tokenizer

클래스/타입: Tokenizer

hotexamples.com에서의 예제들: 8

Python Tokenizer - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 chatterbot.utils.tokenizer.Tokenizer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Tokenizer(3)

get_tokens(3)

예제 #1

파일 보기

파일: comparisons.py 프로젝트: AugustoQueiroz/ChatterBot

def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The percent of similarity between the closest synset distance.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    # The maximum possible similarity is an exact match
    # Because path_similarity returns a value between 0 and 1,
    # max_possible_similarity is the number of words in the longer
    # of the two input statements.
    max_possible_similarity = max(
        len(statement.text.split()),
        len(other_statement.text.split())
    )

    max_similarity = 0.0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

    if max_possible_similarity == 0:
        return 0

    return max_similarity / max_possible_similarity

예제 #2

파일 보기

파일: test_utils.py 프로젝트: AugustoQueiroz/ChatterBot

class TokenizerTestCase(TestCase):

    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()

    def test_get_tokens(self):
        tokens = self.tokenizer.get_tokens('what time is it', exclude_stop_words=False)
        self.assertEqual(tokens, ['what', 'time', 'is', 'it'])

    def test_get_tokens_exclude_stop_words(self):
        tokens = self.tokenizer.get_tokens('what time is it', exclude_stop_words=True)
        self.assertEqual(tokens, {'time'})

예제 #3

파일 보기

class TokenizerTestCase(TestCase):
    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()

    def test_get_tokens(self):
        tokens = self.tokenizer.get_tokens('what time is it',
                                           exclude_stop_words=False)
        self.assertEqual(tokens, ['what', 'time', 'is', 'it'])

    def test_get_tokens_exclude_stop_words(self):
        tokens = self.tokenizer.get_tokens('what time is it',
                                           exclude_stop_words=True)
        self.assertEqual(tokens, {'time'})

예제 #4

파일 보기

def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The percent of similarity between the closest synset distance.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    # The maximum possible similarity is an exact match
    # Because path_similarity returns a value between 0 and 1,
    # max_possible_similarity is the number of words in the longer
    # of the two input statements.
    max_possible_similarity = max(len(statement.text.split()),
                                  len(other_statement.text.split()))

    max_similarity = 0.0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

    if max_possible_similarity == 0:
        return 0

    return max_similarity / max_possible_similarity

예제 #5

파일 보기

def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.

    :return: The ratio of difference between the synset distance of both statements.
    :rtype: float
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity

예제 #6

파일 보기

파일: comparisons.py 프로젝트: osDanielLee/SelfThinkingRobot

def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total maximum synset similarity
    between each word in each sentence.
    """
    from chatterbot.utils.wordnet import Wordnet
    from chatterbot.utils.tokenizer import Tokenizer
    import itertools

    wordnet = Wordnet()
    tokenizer = Tokenizer()

    tokens1 = tokenizer.get_tokens(statement.text)
    tokens2 = tokenizer.get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity

예제 #7

파일 보기

    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()

예제 #8

파일 보기

파일: test_utils.py 프로젝트: AugustoQueiroz/ChatterBot

    def setUp(self):
        super(TokenizerTestCase, self).setUp()
        from chatterbot.utils.tokenizer import Tokenizer

        self.tokenizer = Tokenizer()