def compare(self, first_statement, second_statement):

        statement = first_statement.lower()

        other_statement = second_statement.lower()

        similarity = SequenceMatcher(None, statement, other_statement)

        return round(similarity.ratio(), 10)
Ejemplo n.º 2
0
def levenshtein_distance(statement, other_statement):
    """
    Compare two statements based on the Levenshtein distance
    of each statement's text.

    For example, there is a 65% similarity between the statements
    "where is the post office?" and "looking for the post office"
    based on the Levenshtein distance algorithm.

    :return: The percent of similarity between the text of the statements.
    :rtype: float
    """
    import sys

    # Use python-Levenshtein if available
    try:
        from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher
    except ImportError:
        from difflib import SequenceMatcher

    PYTHON = sys.version_info[0]

    # Return 0 if either statement has a falsy text value
    if not statement.text or not other_statement.text:
        return 0

    # Get the lowercase version of both strings
    if PYTHON < 3:
        statement_text = unicode(statement.text.lower())
        other_statement_text = unicode(other_statement.text.lower())
    else:
        statement_text = str(statement.text.lower())
        other_statement_text = str(other_statement.text.lower())

    similarity = SequenceMatcher(
        None,
        statement_text,
        other_statement_text
    )

    # Calculate a decimal percent of the similarity
    percent = int(round(100 * similarity.ratio())) / 100.0

    return percent
Ejemplo n.º 3
0
    def compare(self, statement, other_statement):
        """
        Compare the two input statements.

        :return: The percent of similarity between the text of the statements.
        :rtype: float
        """
        import sys

        # Use python-Levenshtein if available
        try:
            from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher
        except ImportError:
            from difflib import SequenceMatcher

        PYTHON = sys.version_info[0]

        # Return 0 if either statement has a falsy text value
        if not statement.text or not other_statement.text:
            return 0

        # Get the lowercase version of both strings
        if PYTHON < 3:
            statement_text = unicode(statement.text.lower()) # NOQA
            other_statement_text = unicode(other_statement.text.lower()) # NOQA
        else:
            statement_text = str(statement.text.lower())
            other_statement_text = str(other_statement.text.lower())

        similarity = SequenceMatcher(
            None,
            statement_text,
            other_statement_text
        )

        # Calculate a decimal percent of the similarity
        percent = round(similarity.ratio(), 2)

        return percent
Ejemplo n.º 4
0
    def compare(self, statement, other_statement):
        """
        比较两个输入

        :return: 返回两个句子之间的相似度
        :rtype: 浮点型
        """

        # Return 0 if either statement has a falsy text value
        if not statement.text or not other_statement.text:
            return 0

        statement_text = str(statement.text.lower())
        other_statement_text = str(other_statement.text.lower())

        similarity = SequenceMatcher(None, statement_text,
                                     other_statement_text)

        # Calculate a decimal percent of the similarity
        percent = round(similarity.ratio(), 2)

        return percent
    def compare(self, statement, other_statement):
        """
        Compare the two input statements.
        
        :return: The percent of similarity between the text of the statements.
        :rtype: float
        """
        import sys
        from nltk import word_tokenize
        from chatterbot import utils
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
        global counter
        #global model
        # Use python-Levenshtein if available
        try:
            from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher
        except ImportError:
            from difflib import SequenceMatcher
        
        PYTHON = sys.version_info[0]
        
        # Return 0 if either statement has a falsy text value
        if not statement or not other_statement:
            return 0
        # Get the lowercase version of both strings
        if PYTHON < 3:
            statement_text = unicode(statement.lower())
            other_statement_text = unicode(other_statement.lower())
        else:
            statement_text = str(statement.text.lower())
            other_statement_text = str(other_statement.text.lower())
        
        similarity = SequenceMatcher(
            None,
            statement_text,
            other_statement_text
        )
        counter += 1
        #print "calculating similarity ****************************************************************************",counter 
        # Calculate a decimal percent of the similarity
        percent = int(round(100 * similarity.ratio())) / 100.0

        sentence_1 = clean_sent(statement_text).lower().split()
        sentence_2 = clean_sent(other_statement_text).lower().split()

        tokens1 = (sentence_1)
        tokens2 = (sentence_2)
        # Remove all stop words from the list of word tokens
        s1 = utils.remove_stopwords(tokens1, language='english')
        s2 = utils.remove_stopwords(tokens2, language='english')
        #s1 = [w for w in sentence_1 if w not in stop_words]
        #s2 = [w for w in sentence_2 if w not in stop_words]
        
        distance = model.wmdistance(s1, s2)
        distance_gensim = model.wmdistance(s1, s2)
        if distance == infinity:
            return percent
	
        elif percent > distance:
            if percent - distance < 0.25:
                #print other_statement_text, percent + 0.08, '%', '***DECENT MATCH****'
                #print 'percent: ', percent, 'distance: ', distance
                #print
                return percent + 0.08 + (0.15 * abs(1 - distance))
            else:
                #print other_statement_text, '*****CLOSE MATCH*****'
                #print 'percent: ', percent, 'distance: ', distance
                #print
                return percent + 1.0 + (0.15 * abs(1 - distance))
        elif percent > 0.4:
            if distance - percent < 0.15:
                #print other_statement_text, percent + 0.06, '%'
                #print 'percent: ', percent, 'distance: ', distance
                #print
                return percent + 0.06 + (0.15 * abs(1 - distance))
            else:
                #print other_statement_text, percent - 0.04, '%'
                #print 'percent: ', percent, 'distance: ', distance
                #print
                return (percent - 0.04) - (0.15 * abs(1 - distance))
Ejemplo n.º 6
-1
    def compare(self, statement, other_statement):
        """
        Compare the two input statements.

        :return: The percent of similarity between the text of the statements.
        :rtype: float
        """

        PYTHON = sys.version_info[0]

        # Return 0 if either statement has a falsy text value
        if not statement.text or not other_statement.text:
            return 0

        # Get the lowercase version of both strings
        if PYTHON < 3:
            statement_text = unicode(statement.text.lower())  # NOQA
            other_statement_text = unicode(
                other_statement.text.lower())  # NOQA
        else:
            statement_text = str(statement.text.lower())
            other_statement_text = str(other_statement.text.lower())

        similarity = SequenceMatcher(None, statement_text,
                                     other_statement_text)

        # Calculate a decimal percent of the similarity
        percent = round(similarity.ratio(), 2)

        return percent
Ejemplo n.º 7
-1
    def compare(self, statement, other_statement):
        """
        Compare the two input statements.

        :return: The percent of similarity between the text of the statements.
        :rtype: float
        """

        # Return 0 if either statement has a falsy text value
        # if not statement.text or not other_statement.text:
        #     return 0
        #
        # statement_text = str(statement.text.lower())
        # other_statement_text = str(other_statement.text.lower())
        if not statement or not other_statement:
            return 0

        statement_text = str(statement.lower())
        other_statement_text = str(other_statement.lower())

        similarity = SequenceMatcher(None, statement_text,
                                     other_statement_text)

        # Calculate a decimal percent of the similarity
        percent = round(similarity.ratio(), 4)

        return percent