def __init__(self, **kwargs): super(DeveloperAssistant, self).__init__(**kwargs) # Initializing variables self.program_data = {"name": "", "path": ""} self.stage = "" self.data_dir = "" self.data = self.read_program_file() self.stopwords = StopWordsManager() self.tagger = POSTagger() self.conversation = []
def synset_distance(statement, other_statement): """ Calculate the similarity of two statements. This is based on the total similarity between each word in each sentence. """ from chatterbot.utils.pos_tagger import POSTagger from chatterbot.utils.stop_words import StopWordsManager from chatterbot.utils.word_net import Wordnet import itertools wordnet = Wordnet() tagger = POSTagger() stopwords = StopWordsManager() def get_tokens(text, exclude_stop_words=True): """ Takes a string and converts it to a tuple of each word. Skips common stop words such as ("is, the, a, ...") is 'exclude_stop_words' is True. """ lower = text.lower() tokens = tagger.tokenize(lower) # Remove any stop words from the string if exclude_stop_words: excluded_words = stopwords.words('english') tokens = set(tokens) - set(excluded_words) return tokens tokens1 = get_tokens(statement.text) tokens2 = get_tokens(other_statement.text) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = wordnet.synsets(combination[0]) synset2 = wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity
def __init__(self, **kwargs): super(DeveloperAssistant, self).__init__(**kwargs) # Initializing variables self.program_data = { "name" : "", "path" : "" } self.stage = "" self.data_dir = "" self.data = self.read_program_file() self.stopwords = StopWordsManager() self.tagger = POSTagger() self.conversation = []
class WeatherLogicAdapter(LogicAdapter): """ A logic adapter that returns information regarding the weather and the forecast for a specific location. Currently, only basic information is returned, but additional features are planned in the future. """ def __init__(self, **kwargs): super(WeatherLogicAdapter, self).__init__(**kwargs) self.tagger = POSTagger() self.forecastio_api_key = kwargs.get("forecastio_api_key") def process(self, statement): """ Returns the forecast for a location (using latitude and longitude). """ user_input = statement.text.lower() if "weather" not in user_input: return 0, Statement("") latitude = self.get_latitude(user_input) longitude = self.get_longitude(user_input) if latitude is not "" and longitude is not "": # @TODO: Add more options for getting weather. This could include # the current temperature, the current cloud cover, etc. This # might require removing the forecastio library (which is # probably a good idea). return 1, Statement("The forecast for tomorrow is: " + self.get_weather(latitude, longitude)) return 0, Statement("") def get_latitude(self, user_input): """ Returns the latitude extracted from the input. """ for token in self.tagger.tokenize(user_input): if "latitude=" in token: return re.sub("latitude=", "", token) return "" def get_longitude(self, user_input): """ Returns the longitude extracted from the input. """ for token in self.tagger.tokenize(user_input): if "longitude=" in token: return re.sub("longitude=", "", token) return "" def get_weather(self, latitude, longitude): """ Returns the weather for a given latitude and longitude. """ # @TODO: Find some way to suppress the warnings generated by this. forecast = forecastio.load_forecast(self.forecastio_api_key, latitude, longitude) return forecast.hourly().summary
def __init__(self, **kwargs): super(WeatherLogicAdapter, self).__init__(**kwargs) self.tagger = POSTagger() self.forecastio_api_key = kwargs.get("forecastio_api_key")
class DeveloperAssistant(LogicAdapter): """ The DeveloperAssistant logic adapter provides a set of tools that can help a developer program. Currently, only the following features are supported: 1) Running Python programs """ def __init__(self, **kwargs): super(DeveloperAssistant, self).__init__(**kwargs) # Initializing variables self.program_data = {"name": "", "path": ""} self.stage = "" self.data_dir = "" self.data = self.read_program_file() self.stopwords = StopWordsManager() self.tagger = POSTagger() self.conversation = [] def process(self, statement): """ Assuming the user inputed statement is a request for the developer assistant, parse the request and determine the appropriate action to be used. """ confidence = 0 # Getting the conversation try: self.conversation = self.context.conversation except: pass # Getting the stage of interaction with the user (assuming a command has not been executed) if self.stage is not "name path": self.data = self.read_program_file() confidence = self.determine_stage_of_interaction(statement) if self.stage is "name": return confidence, Statement("What is the absolute path to " + self.program_data["name"] + "?") elif "previously_used" in self.stage: return confidence, Statement("Would you like to use the path " + self.program_data["suggested_path"] + "?") elif "name path" in self.stage: # Run program subprocess.Popen("python " + self.program_data["path"] + self.program_data["name"], shell=True) return_statement = Statement("Running " + self.program_data["name"] + "...") self.update_data() # Resetting global variables self.program_data = {"name": "", "path": ""} self.stage = "" # Return a response return confidence, return_statement return 0, Statement("") def read_program_file(self): """ Read in the programs that have been run previously. """ path = self.data_dir + "programs_run.json" if os.path.exists(path): with open(path, 'r') as data_file: try: return json.load(data_file) except: pass empty_data = {"programs_run": {}} return empty_data def write_program_file(self): """ Write the programs that have been previously run. """ path = self.data_dir + "programs_run.json" with open(path, 'w') as data_file: json.dump(self.data, data_file, sort_keys=True, indent=4, ensure_ascii=False) def update_data(self): """ Update the data for the programs run. """ most_recent_data = { self.program_data["name"]: self.program_data["path"] } self.data["programs_run"].update(most_recent_data) self.write_program_file() def determine_stage_of_interaction(self, input_statement): """ Determines at which point in the interaction with the user chatterbot is. """ confidence = 0 length = len(self.conversation) if length == 0: length = 1 else: length += 1 # Parsing through the conversation with chatterbot looking for information user_input = "" for conversation_index in range(0, length): if conversation_index == len(self.conversation): user_input = input_statement.text else: user_input = self.conversation[conversation_index][0] # Determining whether suggested path was asked if "previously_used" in self.stage: # @TODO: Replace the hardcoded "yes" with a call to a utility # function that determines if any word similar to (in this # case) "yes" is the text if input_statement.text.lower() == "yes": self.stage = "name path" self.program_data["path"] = self.program_data[ "suggested_path"] return 1 # Getting name of program (if available) extracted_name = self.extract_name(user_input) if self.program_data["name"] is "": if extracted_name is not "": self.program_data["name"] = extracted_name self.stage = "name" elif self.program_data[ "name"] is not extracted_name and extracted_name is not "": self.program_data["name"] = extracted_name self.stage = "name" # Getting path of program (if available) extracted_path = self.extract_path(user_input) if self.program_data["path"] is "": if extracted_path is not "": self.program_data["path"] = extracted_path self.stage += " path" elif self.program_data[ "path"] is not extracted_path and extracted_path is not "": self.program_data["path"] = extracted_path self.stage += " path" if self.stage != "": confidence = 1 if self.stage is not "name path": # Read through the programs for program in self.data["programs_run"]: if self.program_data["name"] == program: # Use a suggested path if the program has been used before self.stage += " previously_used" self.program_data["suggested_path"] = self.data[ "programs_run"][program] return confidence def extract_name(self, user_input): """ Return the program's name if it is included somewhere in the conversation. """ name = "" # The following assumes that the user_input is simply: "run program_x" # @TODO: Change this to a more advanced parsing of the user_input. It # requires additional functions within the chatterbot.utils module # and some more thought on how to implement a better system # @TODO: Implement more ways a user can communicate the name for # a program has_asked_run = False for token in self.tagger.tokenize(user_input): if has_asked_run: if "/" in token: name = token.split("/")[len(token.split("/")) - 1] else: name = token break if "run" in token: has_asked_run = True return name def extract_path(self, user_input): """ Return the program's path if it is included somewhere in the conversation. """ path = "" # Identifies the path if one is in user_input # @TODO: Rewrite to remove false positives (which can be created # easily with the current implementation) # @TODO: Implement more ways a user can communicate the path for # a program for word in self.tagger.tokenize(user_input): if "/" in word: if word.endswith("/"): path = word else: split = word.split("/") path = "/".join(split[:len(split) - 1]) + "/" break return path
class DeveloperAssistant(LogicAdapter): """ The DeveloperAssistant logic adapter provides a set of tools that can help a developer program. Currently, only the following features are supported: 1) Running Python programs """ def __init__(self, **kwargs): super(DeveloperAssistant, self).__init__(**kwargs) # Initializing variables self.program_data = { "name" : "", "path" : "" } self.stage = "" self.data_dir = "" self.data = self.read_program_file() self.stopwords = StopWordsManager() self.tagger = POSTagger() self.conversation = [] def process(self, statement): """ Assuming the user inputed statement is a request for the developer assistant, parse the request and determine the appropriate action to be used. """ confidence = 0 # Getting the conversation try: self.conversation = self.context.conversation except: pass # Getting the stage of interaction with the user (assuming a command has not been executed) if self.stage is not "name path": self.data = self.read_program_file() confidence = self.determine_stage_of_interaction(statement) if self.stage is "name": return confidence, Statement("What is the absolute path to " + self.program_data["name"] + "?") elif "previously_used" in self.stage: return confidence, Statement("Would you like to use the path " + self.program_data["suggested_path"] + "?") elif "name path" in self.stage: # Run program subprocess.Popen("python " + self.program_data["path"] + self.program_data["name"], shell=True) return_statement = Statement("Running " + self.program_data["name"] + "...") self.update_data() # Resetting global variables self.program_data = { "name" : "", "path" : "" } self.stage = "" # Return a response return confidence, return_statement return 0, Statement("") def read_program_file(self): """ Read in the programs that have been run previously. """ path = self.data_dir + "programs_run.json" if os.path.exists(path): with open(path, 'r') as data_file: try: return json.load(data_file) except: pass empty_data = { "programs_run": { } } return empty_data def write_program_file(self): """ Write the programs that have been previously run. """ path = self.data_dir + "programs_run.json" with open(path, 'w') as data_file: json.dump(self.data, data_file, sort_keys = True, indent = 4, ensure_ascii=False) def update_data(self): """ Update the data for the programs run. """ most_recent_data = { self.program_data["name"] : self.program_data["path"] } self.data["programs_run"].update(most_recent_data) self.write_program_file() def determine_stage_of_interaction(self, input_statement): """ Determines at which point in the interaction with the user chatterbot is. """ confidence = 0 length = len(self.conversation) if length == 0: length = 1 else: length += 1 # Parsing through the conversation with chatterbot looking for information user_input = "" for conversation_index in range(0, length): if conversation_index == len(self.conversation): user_input = input_statement.text else: user_input = self.conversation[conversation_index][0] # Determining whether suggested path was asked if "previously_used" in self.stage: # @TODO: Replace the hardcoded "yes" with a call to a utility # function that determines if any word similar to (in this # case) "yes" is the text if input_statement.text.lower() == "yes": self.stage = "name path" self.program_data["path"] = self.program_data["suggested_path"] return 1 # Getting name of program (if available) extracted_name = self.extract_name(user_input) if self.program_data["name"] is "": if extracted_name is not "": self.program_data["name"] = extracted_name self.stage = "name" elif self.program_data["name"] is not extracted_name and extracted_name is not "": self.program_data["name"] = extracted_name self.stage = "name" # Getting path of program (if available) extracted_path = self.extract_path(user_input) if self.program_data["path"] is "": if extracted_path is not "": self.program_data["path"] = extracted_path self.stage += " path" elif self.program_data["path"] is not extracted_path and extracted_path is not "": self.program_data["path"] = extracted_path self.stage += " path" if self.stage != "": confidence = 1 if self.stage is not "name path": # Read through the programs for program in self.data["programs_run"]: if self.program_data["name"] == program: # Use a suggested path if the program has been used before self.stage += " previously_used" self.program_data["suggested_path"] = self.data["programs_run"][program] return confidence def extract_name(self, user_input): """ Return the program's name if it is included somewhere in the conversation. """ name = "" # The following assumes that the user_input is simply: "run program_x" # @TODO: Change this to a more advanced parsing of the user_input. It # requires additional functions within the chatterbot.utils module # and some more thought on how to implement a better system # @TODO: Implement more ways a user can communicate the name for # a program has_asked_run = False for token in self.tagger.tokenize(user_input): if has_asked_run: if "/" in token: name = token.split("/")[len(token.split("/")) - 1] else: name = token break if "run" in token: has_asked_run = True return name def extract_path(self, user_input): """ Return the program's path if it is included somewhere in the conversation. """ path = "" # Identifies the path if one is in user_input # @TODO: Rewrite to remove false positives (which can be created # easily with the current implementation) # @TODO: Implement more ways a user can communicate the path for # a program for word in self.tagger.tokenize(user_input): if "/" in word: if word.endswith("/"): path = word else: split = word.split("/") path = "/".join(split[:len(split) - 1]) + "/" break return path
def __init__(self, **kwargs): super(ClosestMeaningAdapter, self).__init__(**kwargs) self.wordnet = Wordnet() self.tagger = POSTagger() self.stopwords = StopWordsManager()
def test_pos_tagger(self): pos_tagger = POSTagger() tokens = pos_tagger.tokenize("what time is it") self.assertEqual(tokens, ['what', 'time', 'is', 'it'])
def test_pos_tagger_tokenize(self): pos_tagger = POSTagger() tokens = pos_tagger.tokenize("what time is it") self.assertEqual(tokens, ['what', 'time', 'is', 'it'])
class ClosestMeaningAdapter(BaseMatchAdapter): """ This adapter selects a response by comparing the tokenized form of the input statement's text, with the tokenized form of possible matching statements. For each possible match, the sum of the Cartesian product of the path similarity of each statement is compared. This process simulates an evaluation of the closeness of synonyms. The known statement with the greatest path similarity is then returned. """ def __init__(self, **kwargs): super(ClosestMeaningAdapter, self).__init__(**kwargs) self.wordnet = Wordnet() self.tagger = POSTagger() self.stopwords = StopWordsManager() def get_tokens(self, text, exclude_stop_words=True): """ Takes a string and converts it to a tuple of each word. Skips common stop words such as ("is, the, a, ...") is 'exclude_stop_words' is True. """ lower = text.lower() tokens = self.tagger.tokenize(lower) # Remove any stop words from the string if exclude_stop_words: excluded_words = self.stopwords.words("english") tokens = set(tokens) - set(excluded_words) return tokens def get_similarity(self, string1, string2): """ Calculate the similarity of two statements. This is based on the total similarity between each word in each sentence. """ import itertools tokens1 = self.get_tokens(string1) tokens2 = self.get_tokens(string2) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = self.wordnet.synsets(combination[0]) synset2 = self.wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity def get(self, input_statement): """ Takes a statement string and a list of statement strings. Returns the closest matching statement from the list. """ statement_list = self.context.storage.get_response_statements() if not statement_list: if self.has_storage_context: # Use a randomly picked statement return 0, self.context.storage.get_random() else: raise self.EmptyDatasetException() # Get the text of each statement text_of_all_statements = [] for statement in statement_list: text_of_all_statements.append(statement.text) # Check if an exact match exists if input_statement.text in text_of_all_statements: return 1, input_statement closest_statement = None closest_similarity = -1 total_similarity = 0 # For each option in the list of options for statement in text_of_all_statements: similarity = self.get_similarity(input_statement.text, statement) total_similarity += similarity if similarity > closest_similarity: closest_similarity = similarity closest_statement = statement try: confidence = closest_similarity / total_similarity except: confidence = 0 return confidence, next( (s for s in statement_list if s.text == closest_statement), None )
class ClosestMeaningAdapter(BaseMatchAdapter): """ This adapter selects a response by comparing the tokenized form of the input statement's text, with the tokenized form of possible matching statements. For each possible match, the sum of the Cartesian product of the path similarity of each statement is compared. This process simulates an evaluation of the closeness of synonyms. The known statement with the greatest path similarity is then returned. """ def __init__(self, **kwargs): super(ClosestMeaningAdapter, self).__init__(**kwargs) self.wordnet = Wordnet() self.tagger = POSTagger() self.stopwords = StopWordsManager() def get_tokens(self, text, exclude_stop_words=True): """ Takes a string and converts it to a tuple of each word. Skips common stop words such as ("is, the, a, ...") is 'exclude_stop_words' is True. """ lower = text.lower() tokens = self.tagger.tokenize(lower) # Remove any stop words from the string if exclude_stop_words: excluded_words = self.stopwords.words("english") tokens = set(tokens) - set(excluded_words) return tokens def get_similarity(self, string1, string2): """ Calculate the similarity of two statements. This is based on the total similarity between each word in each sentence. """ import itertools tokens1 = self.get_tokens(string1) tokens2 = self.get_tokens(string2) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = self.wordnet.synsets(combination[0]) synset2 = self.wordnet.synsets(combination[1]) if synset1 and synset2: max_similarity = 0 # Get the highest similarity for each combination of synsets for synset in itertools.product(*[synset1, synset2]): similarity = synset[0].path_similarity(synset[1]) if similarity and (similarity > max_similarity): max_similarity = similarity # Add the most similar path value to the total total_similarity += max_similarity return total_similarity def get(self, input_statement): """ Takes a statement string and a list of statement strings. Returns the closest matching statement from the list. """ statement_list = self.context.storage.get_response_statements() if not statement_list: if self.has_storage_context: # Use a randomly picked statement return 0, self.context.storage.get_random() else: raise self.EmptyDatasetException() # Get the text of each statement text_of_all_statements = [] for statement in statement_list: text_of_all_statements.append(statement.text) # Check if an exact match exists if input_statement.text in text_of_all_statements: return 1, input_statement closest_statement = None closest_similarity = -1 total_similarity = 0 # For each option in the list of options for statement in text_of_all_statements: similarity = self.get_similarity(input_statement.text, statement) total_similarity += similarity if similarity > closest_similarity: closest_similarity = similarity closest_statement = statement try: confidence = closest_similarity / total_similarity except: confidence = 0 return confidence, next( (s for s in statement_list if s.text == closest_statement), None)
class ClosestMeaningAdapter(BaseMatchAdapter): def __init__(self, **kwargs): super(ClosestMeaningAdapter, self).__init__(**kwargs) self.wordnet = Wordnet() self.tagger = POSTagger() self.stopwords = StopWordsManager() def get_tokens(self, text, exclude_stop_words=True): """ Takes a string and converts it to a tuple of each word. Skips common stop words such as ("is, the, a, ...") is 'exclude_stop_words' is True. """ lower = text.lower() tokens = self.tagger.tokenize(lower) # Remove any stop words from the string if exclude_stop_words: excluded_words = self.stopwords.words("english") tokens = set(tokens) - set(excluded_words) return tokens def get_similarity(self, string1, string2): """ Calculate the similarity of two statements. This is based on the total similarity between each word in each sentence. """ import itertools tokens1 = self.get_tokens(string1) tokens2 = self.get_tokens(string2) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = self.wordnet.synsets(combination[0]) synset2 = self.wordnet.synsets(combination[1]) if synset1 and synset2: # Compare the first synset in each list of synsets similarity = synset1[0].path_similarity(synset2[0]) if similarity: total_similarity = total_similarity + similarity return total_similarity def get(self, input_statement, statement_list=None): """ Takes a statement string and a list of statement strings. Returns the closest matching statement from the list. """ statement_list = self.get_available_statements(statement_list) if not statement_list: if self.has_storage_context: # Use a randomly picked statement return 0, self.context.storage.get_random() else: raise EmptyDatasetException # Get the text of each statement text_of_all_statements = [] for statement in statement_list: text_of_all_statements.append(statement.text) # Check if an exact match exists if input_statement.text in text_of_all_statements: return 1, input_statement closest_statement = None closest_similarity = -1 total_similarity = 0 # For each option in the list of options for statement in text_of_all_statements: similarity = self.get_similarity(input_statement.text, statement) total_similarity += similarity if similarity > closest_similarity: closest_similarity = similarity closest_statement = statement try: confidence = closest_similarity / total_similarity except: confidence = 0 return confidence, next( (s for s in statement_list if s.text == closest_statement), None )
class ClosestMeaningAdapter(BaseMatchAdapter): def __init__(self, **kwargs): super(ClosestMeaningAdapter, self).__init__(**kwargs) self.wordnet = Wordnet() self.tagger = POSTagger() self.stopwords = StopWordsManager() def get_tokens(self, text, exclude_stop_words=True): """ Takes a string and converts it to a tuple of each word. Skips common stop words such as ("is, the, a, ...") is 'exclude_stop_words' is True. """ lower = text.lower() tokens = self.tagger.tokenize(lower) # Remove any stop words from the string if exclude_stop_words: excluded_words = self.stopwords.words("english") tokens = set(tokens) - set(excluded_words) return tokens def get_similarity(self, string1, string2): """ Calculate the similarity of two statements. This is based on the total similarity between each word in each sentence. """ import itertools tokens1 = self.get_tokens(string1) tokens2 = self.get_tokens(string2) total_similarity = 0 # Get the highest matching value for each possible combination of words for combination in itertools.product(*[tokens1, tokens2]): synset1 = self.wordnet.synsets(combination[0]) synset2 = self.wordnet.synsets(combination[1]) if synset1 and synset2: # Compare the first synset in each list of synsets similarity = synset1[0].path_similarity(synset2[0]) if similarity: total_similarity = total_similarity + similarity return total_similarity def get(self, input_statement, statement_list=None): """ Takes a statement string and a list of statement strings. Returns the closest matching statement from the list. """ statement_list = self.get_available_statements(statement_list) if not statement_list: if self.has_storage_context: # Use a randomly picked statement return 0, self.context.storage.get_random() else: raise EmptyDatasetException # Get the text of each statement text_of_all_statements = [] for statement in statement_list: text_of_all_statements.append(statement.text) # Check if an exact match exists if input_statement.text in text_of_all_statements: return 1, input_statement closest_statement = None closest_similarity = -1 total_similarity = 0 # For each option in the list of options for statement in text_of_all_statements: similarity = self.get_similarity(input_statement.text, statement) total_similarity += similarity if similarity > closest_similarity: closest_similarity = similarity closest_statement = statement try: confidence = closest_similarity / total_similarity except: confidence = 0 return confidence, next( (s for s in statement_list if s.text == closest_statement), None)