Esempio n. 1
0
    def __init__(self, **kwargs):
        super(DeveloperAssistant, self).__init__(**kwargs)

        # Initializing variables
        self.program_data = {"name": "", "path": ""}
        self.stage = ""
        self.data_dir = ""
        self.data = self.read_program_file()

        self.stopwords = StopWordsManager()
        self.tagger = POSTagger()
        self.conversation = []
Esempio n. 2
0
def synset_distance(statement, other_statement):
    """
    Calculate the similarity of two statements.
    This is based on the total similarity between
    each word in each sentence.
    """
    from chatterbot.utils.pos_tagger import POSTagger
    from chatterbot.utils.stop_words import StopWordsManager
    from chatterbot.utils.word_net import Wordnet
    import itertools

    wordnet = Wordnet()
    tagger = POSTagger()
    stopwords = StopWordsManager()

    def get_tokens(text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = stopwords.words('english')

            tokens = set(tokens) - set(excluded_words)

        return tokens

    tokens1 = get_tokens(statement.text)
    tokens2 = get_tokens(other_statement.text)

    total_similarity = 0

    # Get the highest matching value for each possible combination of words
    for combination in itertools.product(*[tokens1, tokens2]):

        synset1 = wordnet.synsets(combination[0])
        synset2 = wordnet.synsets(combination[1])

        if synset1 and synset2:

            max_similarity = 0

            # Get the highest similarity for each combination of synsets
            for synset in itertools.product(*[synset1, synset2]):
                similarity = synset[0].path_similarity(synset[1])

                if similarity and (similarity > max_similarity):
                    max_similarity = similarity

            # Add the most similar path value to the total
            total_similarity += max_similarity

    return total_similarity
Esempio n. 3
0
    def __init__(self, **kwargs):
        super(DeveloperAssistant, self).__init__(**kwargs)

        # Initializing variables
        self.program_data = { "name" : "", "path" : "" }
        self.stage = ""
        self.data_dir = ""
        self.data = self.read_program_file()

        self.stopwords = StopWordsManager()
        self.tagger = POSTagger()
        self.conversation = []
Esempio n. 4
0
class WeatherLogicAdapter(LogicAdapter):
    """
    A logic adapter that returns information regarding the weather and
    the forecast for a specific location. Currently, only basic information
    is returned, but additional features are planned in the future.
    """

    def __init__(self, **kwargs):
        super(WeatherLogicAdapter, self).__init__(**kwargs)

        self.tagger = POSTagger()
        self.forecastio_api_key = kwargs.get("forecastio_api_key")

    def process(self, statement):
        """
        Returns the forecast for a location (using latitude and longitude).
        """
        user_input = statement.text.lower()
        if "weather" not in user_input:
            return 0, Statement("")

        latitude = self.get_latitude(user_input)
        longitude = self.get_longitude(user_input)

        if latitude is not "" and longitude is not "":
            # @TODO: Add more options for getting weather. This could include
            #   the current temperature, the current cloud cover, etc. This
            #   might require removing the forecastio library (which is
            #   probably a good idea).
            return 1, Statement("The forecast for tomorrow is: " + self.get_weather(latitude, longitude))

        return 0, Statement("")

    def get_latitude(self, user_input):
        """
        Returns the latitude extracted from the input.
        """
        for token in self.tagger.tokenize(user_input):
            if "latitude=" in token:
                return re.sub("latitude=", "", token)

        return ""

    def get_longitude(self, user_input):
        """
        Returns the longitude extracted from the input.
        """
        for token in self.tagger.tokenize(user_input):
            if "longitude=" in token:
                return re.sub("longitude=", "", token)

        return ""

    def get_weather(self, latitude, longitude):
        """
        Returns the weather for a given latitude and longitude.
        """
        # @TODO: Find some way to suppress the warnings generated by this.
        forecast = forecastio.load_forecast(self.forecastio_api_key, latitude, longitude)

        return forecast.hourly().summary
Esempio n. 5
0
    def __init__(self, **kwargs):
        super(WeatherLogicAdapter, self).__init__(**kwargs)

        self.tagger = POSTagger()
        self.forecastio_api_key = kwargs.get("forecastio_api_key")
Esempio n. 6
0
class DeveloperAssistant(LogicAdapter):
    """
    The DeveloperAssistant logic adapter provides a set of tools
    that can help a developer program. Currently, only the following
    features are supported:
    1) Running Python programs
    """
    def __init__(self, **kwargs):
        super(DeveloperAssistant, self).__init__(**kwargs)

        # Initializing variables
        self.program_data = {"name": "", "path": ""}
        self.stage = ""
        self.data_dir = ""
        self.data = self.read_program_file()

        self.stopwords = StopWordsManager()
        self.tagger = POSTagger()
        self.conversation = []

    def process(self, statement):
        """
        Assuming the user inputed statement is a
        request for the developer assistant, parse
        the request and determine the appropriate
        action to be used.
        """
        confidence = 0

        # Getting the conversation
        try:
            self.conversation = self.context.conversation
        except:
            pass

        # Getting the stage of interaction with the user (assuming a command has not been executed)
        if self.stage is not "name path":
            self.data = self.read_program_file()
            confidence = self.determine_stage_of_interaction(statement)

        if self.stage is "name":
            return confidence, Statement("What is the absolute path to " +
                                         self.program_data["name"] + "?")
        elif "previously_used" in self.stage:
            return confidence, Statement("Would you like to use the path " +
                                         self.program_data["suggested_path"] +
                                         "?")
        elif "name path" in self.stage:
            # Run program
            subprocess.Popen("python " + self.program_data["path"] +
                             self.program_data["name"],
                             shell=True)
            return_statement = Statement("Running " +
                                         self.program_data["name"] + "...")
            self.update_data()

            # Resetting global variables
            self.program_data = {"name": "", "path": ""}
            self.stage = ""

            # Return a response
            return confidence, return_statement

        return 0, Statement("")

    def read_program_file(self):
        """
        Read in the programs that have been run previously.
        """
        path = self.data_dir + "programs_run.json"
        if os.path.exists(path):
            with open(path, 'r') as data_file:
                try:
                    return json.load(data_file)
                except:
                    pass

        empty_data = {"programs_run": {}}

        return empty_data

    def write_program_file(self):
        """
        Write the programs that have been previously run.
        """
        path = self.data_dir + "programs_run.json"
        with open(path, 'w') as data_file:
            json.dump(self.data,
                      data_file,
                      sort_keys=True,
                      indent=4,
                      ensure_ascii=False)

    def update_data(self):
        """
        Update the data for the programs run.
        """
        most_recent_data = {
            self.program_data["name"]: self.program_data["path"]
        }
        self.data["programs_run"].update(most_recent_data)
        self.write_program_file()

    def determine_stage_of_interaction(self, input_statement):
        """
        Determines at which point in the interaction with
        the user chatterbot is.
        """
        confidence = 0

        length = len(self.conversation)
        if length == 0:
            length = 1
        else:
            length += 1

        # Parsing through the conversation with chatterbot looking for information
        user_input = ""
        for conversation_index in range(0, length):
            if conversation_index == len(self.conversation):
                user_input = input_statement.text
            else:
                user_input = self.conversation[conversation_index][0]

            # Determining whether suggested path was asked
            if "previously_used" in self.stage:
                # @TODO: Replace the hardcoded "yes" with a call to a utility
                #   function that determines if any word similar to (in this
                #   case) "yes" is the text
                if input_statement.text.lower() == "yes":
                    self.stage = "name path"
                    self.program_data["path"] = self.program_data[
                        "suggested_path"]

                    return 1

            # Getting name of program (if available)
            extracted_name = self.extract_name(user_input)
            if self.program_data["name"] is "":
                if extracted_name is not "":
                    self.program_data["name"] = extracted_name
                    self.stage = "name"
            elif self.program_data[
                    "name"] is not extracted_name and extracted_name is not "":
                self.program_data["name"] = extracted_name
                self.stage = "name"

            # Getting path of program (if available)
            extracted_path = self.extract_path(user_input)
            if self.program_data["path"] is "":
                if extracted_path is not "":
                    self.program_data["path"] = extracted_path
                    self.stage += " path"
            elif self.program_data[
                    "path"] is not extracted_path and extracted_path is not "":
                self.program_data["path"] = extracted_path
                self.stage += " path"

        if self.stage != "":
            confidence = 1

        if self.stage is not "name path":
            # Read through the programs
            for program in self.data["programs_run"]:
                if self.program_data["name"] == program:
                    # Use a suggested path if the program has been used before
                    self.stage += " previously_used"
                    self.program_data["suggested_path"] = self.data[
                        "programs_run"][program]

        return confidence

    def extract_name(self, user_input):
        """
        Return the program's name if it is included somewhere in the
        conversation.
        """
        name = ""

        # The following assumes that the user_input is simply: "run program_x"
        # @TODO: Change this to a more advanced parsing of the user_input. It
        #   requires additional functions within the chatterbot.utils module
        #   and some more thought on how to implement a better system
        # @TODO: Implement more ways a user can communicate the name for
        #   a program
        has_asked_run = False
        for token in self.tagger.tokenize(user_input):
            if has_asked_run:
                if "/" in token:
                    name = token.split("/")[len(token.split("/")) - 1]
                else:
                    name = token
                break

            if "run" in token:
                has_asked_run = True

        return name

    def extract_path(self, user_input):
        """
        Return the program's path if it is included somewhere in the
        conversation.
        """
        path = ""

        # Identifies the path if one is in user_input
        # @TODO: Rewrite to remove false positives (which can be created
        #   easily with the current implementation)
        # @TODO: Implement more ways a user can communicate the path for
        #   a program
        for word in self.tagger.tokenize(user_input):
            if "/" in word:
                if word.endswith("/"):
                    path = word
                else:
                    split = word.split("/")
                    path = "/".join(split[:len(split) - 1]) + "/"
                break

        return path
Esempio n. 7
0
class WeatherLogicAdapter(LogicAdapter):
    """
    A logic adapter that returns information regarding the weather and
    the forecast for a specific location. Currently, only basic information
    is returned, but additional features are planned in the future.
    """

    def __init__(self, **kwargs):
        super(WeatherLogicAdapter, self).__init__(**kwargs)

        self.tagger = POSTagger()
        self.forecastio_api_key = kwargs.get("forecastio_api_key")

    def process(self, statement):
        """
        Returns the forecast for a location (using latitude and longitude).
        """
        user_input = statement.text.lower()
        if "weather" not in user_input:
            return 0, Statement("")

        latitude = self.get_latitude(user_input)
        longitude = self.get_longitude(user_input)

        if latitude is not "" and longitude is not "":
            # @TODO: Add more options for getting weather. This could include
            #   the current temperature, the current cloud cover, etc. This
            #   might require removing the forecastio library (which is
            #   probably a good idea).
            return 1, Statement("The forecast for tomorrow is: " + self.get_weather(latitude, longitude))

        return 0, Statement("")

    def get_latitude(self, user_input):
        """
        Returns the latitude extracted from the input.
        """
        for token in self.tagger.tokenize(user_input):
            if "latitude=" in token:
                return re.sub("latitude=", "", token)

        return ""

    def get_longitude(self, user_input):
        """
        Returns the longitude extracted from the input.
        """
        for token in self.tagger.tokenize(user_input):
            if "longitude=" in token:
                return re.sub("longitude=", "", token)

        return ""

    def get_weather(self, latitude, longitude):
        """
        Returns the weather for a given latitude and longitude.
        """
        # @TODO: Find some way to suppress the warnings generated by this.
        forecast = forecastio.load_forecast(self.forecastio_api_key, latitude, longitude)

        return forecast.hourly().summary
Esempio n. 8
0
    def __init__(self, **kwargs):
        super(WeatherLogicAdapter, self).__init__(**kwargs)

        self.tagger = POSTagger()
        self.forecastio_api_key = kwargs.get("forecastio_api_key")
Esempio n. 9
0
class DeveloperAssistant(LogicAdapter):
    """
    The DeveloperAssistant logic adapter provides a set of tools
    that can help a developer program. Currently, only the following
    features are supported:
    1) Running Python programs
    """

    def __init__(self, **kwargs):
        super(DeveloperAssistant, self).__init__(**kwargs)

        # Initializing variables
        self.program_data = { "name" : "", "path" : "" }
        self.stage = ""
        self.data_dir = ""
        self.data = self.read_program_file()

        self.stopwords = StopWordsManager()
        self.tagger = POSTagger()
        self.conversation = []

    def process(self, statement):
        """
        Assuming the user inputed statement is a
        request for the developer assistant, parse
        the request and determine the appropriate
        action to be used.
        """
        confidence = 0

        # Getting the conversation
        try:
            self.conversation = self.context.conversation
        except:
            pass

        # Getting the stage of interaction with the user (assuming a command has not been executed)
        if self.stage is not "name path":
            self.data = self.read_program_file()
            confidence = self.determine_stage_of_interaction(statement)

        if self.stage is "name":
            return confidence, Statement("What is the absolute path to " + self.program_data["name"] + "?")
        elif "previously_used" in self.stage:
            return confidence, Statement("Would you like to use the path " + self.program_data["suggested_path"] + "?")
        elif "name path" in self.stage:
            # Run program
            subprocess.Popen("python " + self.program_data["path"] + self.program_data["name"], shell=True)
            return_statement = Statement("Running " + self.program_data["name"] + "...")
            self.update_data()

            # Resetting global variables
            self.program_data = { "name" : "", "path" : "" }
            self.stage = ""

            # Return a response
            return confidence, return_statement

        return 0, Statement("")

    def read_program_file(self):
        """
        Read in the programs that have been run previously.
        """
        path = self.data_dir + "programs_run.json"
        if os.path.exists(path):
            with open(path, 'r') as data_file:
                try:
                    return json.load(data_file)
                except:
                    pass

        empty_data = {
            "programs_run": {
            }
        }

        return empty_data

    def write_program_file(self):
        """
        Write the programs that have been previously run.
        """
        path = self.data_dir + "programs_run.json"
        with open(path, 'w') as data_file:
            json.dump(self.data, data_file, sort_keys = True, indent = 4, ensure_ascii=False)

    def update_data(self):
        """
        Update the data for the programs run.
        """
        most_recent_data = { self.program_data["name"] : self.program_data["path"] }
        self.data["programs_run"].update(most_recent_data)
        self.write_program_file()

    def determine_stage_of_interaction(self, input_statement):
        """
        Determines at which point in the interaction with
        the user chatterbot is.
        """
        confidence = 0

        length = len(self.conversation)
        if length == 0:
            length = 1
        else:
            length += 1

        # Parsing through the conversation with chatterbot looking for information
        user_input = ""
        for conversation_index in range(0, length):
            if conversation_index == len(self.conversation):
                user_input = input_statement.text
            else:
                user_input = self.conversation[conversation_index][0]

            # Determining whether suggested path was asked
            if "previously_used" in self.stage:
                # @TODO: Replace the hardcoded "yes" with a call to a utility
                #   function that determines if any word similar to (in this
                #   case) "yes" is the text
                if input_statement.text.lower() == "yes":
                    self.stage = "name path"
                    self.program_data["path"] = self.program_data["suggested_path"]

                    return 1

            # Getting name of program (if available)
            extracted_name = self.extract_name(user_input)
            if self.program_data["name"] is "":
                if extracted_name is not "":
                    self.program_data["name"] = extracted_name
                    self.stage = "name"
            elif self.program_data["name"] is not extracted_name and extracted_name is not "":
                self.program_data["name"] = extracted_name
                self.stage = "name"

            # Getting path of program (if available)
            extracted_path = self.extract_path(user_input)
            if self.program_data["path"] is "":
                if extracted_path is not "":
                    self.program_data["path"] = extracted_path
                    self.stage += " path"
            elif self.program_data["path"] is not extracted_path and extracted_path is not "":
                self.program_data["path"] = extracted_path
                self.stage += " path"

        if self.stage != "":
            confidence = 1

        if self.stage is not "name path":
            # Read through the programs
            for program in self.data["programs_run"]:
                if self.program_data["name"] == program:
                    # Use a suggested path if the program has been used before
                    self.stage += " previously_used"
                    self.program_data["suggested_path"] = self.data["programs_run"][program]

        return confidence

    def extract_name(self, user_input):
        """
        Return the program's name if it is included somewhere in the
        conversation.
        """
        name = ""

        # The following assumes that the user_input is simply: "run program_x"
        # @TODO: Change this to a more advanced parsing of the user_input. It
        #   requires additional functions within the chatterbot.utils module
        #   and some more thought on how to implement a better system
        # @TODO: Implement more ways a user can communicate the name for
        #   a program
        has_asked_run = False
        for token in self.tagger.tokenize(user_input):
            if has_asked_run:
                if "/" in token:
                    name = token.split("/")[len(token.split("/")) - 1]
                else:
                    name = token
                break

            if "run" in token:
                has_asked_run = True

        return name

    def extract_path(self, user_input):
        """
        Return the program's path if it is included somewhere in the
        conversation.
        """
        path = ""

        # Identifies the path if one is in user_input
        # @TODO: Rewrite to remove false positives (which can be created
        #   easily with the current implementation)
        # @TODO: Implement more ways a user can communicate the path for
        #   a program
        for word in self.tagger.tokenize(user_input):
            if "/" in word:
                if word.endswith("/"):
                    path = word
                else:
                    split = word.split("/")
                    path = "/".join(split[:len(split) - 1]) + "/"
                break

        return path
Esempio n. 10
0
    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()
Esempio n. 11
0
    def test_pos_tagger(self):
        pos_tagger = POSTagger()
        tokens = pos_tagger.tokenize("what time is it")

        self.assertEqual(tokens, ['what', 'time', 'is', 'it'])
Esempio n. 12
0
    def test_pos_tagger_tokenize(self):
        pos_tagger = POSTagger()
        tokens = pos_tagger.tokenize("what time is it")

        self.assertEqual(tokens, ['what', 'time', 'is', 'it'])
Esempio n. 13
0
class ClosestMeaningAdapter(BaseMatchAdapter):
    """
    This adapter selects a response by comparing the tokenized form of the
    input statement's text, with the tokenized form of possible matching
    statements. For each possible match, the sum of the Cartesian product of
    the path similarity of each statement is compared. This process simulates
    an evaluation of the closeness of synonyms. The known statement with the
    greatest path similarity is then returned.
    """

    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                max_similarity = 0

                # Get the highest similarity for each combination of synsets
                for synset in itertools.product(*[synset1, synset2]):
                    similarity = synset[0].path_similarity(synset[1])

                    if similarity and (similarity > max_similarity):
                        max_similarity = similarity

                # Add the most similar path value to the total
                total_similarity += max_similarity

        return total_similarity

    def get(self, input_statement):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.context.storage.get_response_statements()

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise self.EmptyDatasetException()

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None
        )
Esempio n. 14
0
    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()
Esempio n. 15
0
class ClosestMeaningAdapter(BaseMatchAdapter):
    """
    This adapter selects a response by comparing the tokenized form of the
    input statement's text, with the tokenized form of possible matching
    statements. For each possible match, the sum of the Cartesian product of
    the path similarity of each statement is compared. This process simulates
    an evaluation of the closeness of synonyms. The known statement with the
    greatest path similarity is then returned.
    """
    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                max_similarity = 0

                # Get the highest similarity for each combination of synsets
                for synset in itertools.product(*[synset1, synset2]):
                    similarity = synset[0].path_similarity(synset[1])

                    if similarity and (similarity > max_similarity):
                        max_similarity = similarity

                # Add the most similar path value to the total
                total_similarity += max_similarity

        return total_similarity

    def get(self, input_statement):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.context.storage.get_response_statements()

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise self.EmptyDatasetException()

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None)
Esempio n. 16
0
class ClosestMeaningAdapter(BaseMatchAdapter):

    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                # Compare the first synset in each list of synsets
                similarity = synset1[0].path_similarity(synset2[0])

                if similarity:
                    total_similarity = total_similarity + similarity

        return total_similarity

    def get(self, input_statement, statement_list=None):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.get_available_statements(statement_list)

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise EmptyDatasetException

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None
        )
Esempio n. 17
0
class ClosestMeaningAdapter(BaseMatchAdapter):
    def __init__(self, **kwargs):
        super(ClosestMeaningAdapter, self).__init__(**kwargs)

        self.wordnet = Wordnet()
        self.tagger = POSTagger()
        self.stopwords = StopWordsManager()

    def get_tokens(self, text, exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple
        of each word. Skips common stop words such
        as ("is, the, a, ...") is 'exclude_stop_words'
        is True.
        """
        lower = text.lower()
        tokens = self.tagger.tokenize(lower)

        # Remove any stop words from the string
        if exclude_stop_words:
            excluded_words = self.stopwords.words("english")

            tokens = set(tokens) - set(excluded_words)

        return tokens

    def get_similarity(self, string1, string2):
        """
        Calculate the similarity of two statements.
        This is based on the total similarity between
        each word in each sentence.
        """
        import itertools

        tokens1 = self.get_tokens(string1)
        tokens2 = self.get_tokens(string2)

        total_similarity = 0

        # Get the highest matching value for each possible combination of words
        for combination in itertools.product(*[tokens1, tokens2]):

            synset1 = self.wordnet.synsets(combination[0])
            synset2 = self.wordnet.synsets(combination[1])

            if synset1 and synset2:

                # Compare the first synset in each list of synsets
                similarity = synset1[0].path_similarity(synset2[0])

                if similarity:
                    total_similarity = total_similarity + similarity

        return total_similarity

    def get(self, input_statement, statement_list=None):
        """
        Takes a statement string and a list of statement strings.
        Returns the closest matching statement from the list.
        """
        statement_list = self.get_available_statements(statement_list)

        if not statement_list:
            if self.has_storage_context:
                # Use a randomly picked statement
                return 0, self.context.storage.get_random()
            else:
                raise EmptyDatasetException

        # Get the text of each statement
        text_of_all_statements = []
        for statement in statement_list:
            text_of_all_statements.append(statement.text)

        # Check if an exact match exists
        if input_statement.text in text_of_all_statements:
            return 1, input_statement

        closest_statement = None
        closest_similarity = -1
        total_similarity = 0

        # For each option in the list of options
        for statement in text_of_all_statements:
            similarity = self.get_similarity(input_statement.text, statement)

            total_similarity += similarity

            if similarity > closest_similarity:
                closest_similarity = similarity
                closest_statement = statement

        try:
            confidence = closest_similarity / total_similarity
        except:
            confidence = 0

        return confidence, next(
            (s for s in statement_list if s.text == closest_statement), None)