def train(self, conversation): """ Train the chat bot based on the provided list of statements that represents a single conversation. """ previous_statement_text = None previous_statement_search_text = '' statements_to_create = [] for conversation_count, text in enumerate(conversation): if self.show_training_progress: utils.print_progress_bar('List Trainer', conversation_count + 1, len(conversation)) statement_search_text = self.chatbot.storage.tagger.get_text_index_string( text) statement = self.get_preprocessed_statement( Statement(text=text, search_text=statement_search_text, in_response_to=previous_statement_text, search_in_response_to=previous_statement_search_text, conversation='training')) previous_statement_text = statement.text previous_statement_search_text = statement_search_text statements_to_create.append(statement) self.chatbot.storage.create_many(statements_to_create)
def train(self, conversation): """ Train the chat bot based on the provided list of statements that represents a single conversation. """ previous_statement_text = None for conversation_count, text in enumerate(conversation): if self.show_training_progress: utils.print_progress_bar('List Trainer', conversation_count + 1, len(conversation)) statement = self.get_preprocessed_statement( Statement(text=text, in_response_to=previous_statement_text, conversation='training')) previous_statement_text = statement.text self.chatbot.storage.create( text=statement.text, in_response_to=statement.in_response_to, conversation=statement.conversation, tags=statement.tags)
def train(self, conversation): """ Train the chat bot based on the provided list of statements that represents a single conversation. """ previous_statement_text = None previous_statement_search_text = '' statements_to_create = [] for conversation_count, text in enumerate(conversation): if self.show_training_progress: utils.print_progress_bar( 'List Trainer', conversation_count + 1, len(conversation) ) statement_search_text = self.stemmer.get_bigram_pair_string(text) statement = self.get_preprocessed_statement( Statement( text=text, search_text=statement_search_text, in_response_to=previous_statement_text, search_in_response_to=previous_statement_search_text, conversation='training' ) ) previous_statement_text = statement.text previous_statement_search_text = statement_search_text statements_to_create.append(statement) self.chatbot.storage.create_many(statements_to_create)
def train(self, *corpus_paths): #corpus_paths is a list of paths of corpus or one corpus path. for corpus in corpus_paths: print(corpus) file = open(corpus) l_line = [] l_line = file.readlines() N = len(l_line) for i in range(N): l_line[i] = l_line[i].strip() file.close() previous_statement_text = None for conversation_count, text in enumerate(l_line): print_progress_bar("Subtitle Trainer", conversation_count + 1, N) statement = self.get_or_create(text) if previous_statement_text: statement.add_response(Response(previous_statement_text)) previous_statement_text = statement.text self.storage.update(statement)
def train(self, NN=0, N=None, *corpus_paths): #corpus_paths is a list of paths of corpus or one corpus path. #read each corpus from line NN to N. By default the whole file is read. print('N=', N) for corpus in corpus_paths: print(corpus) file = open(corpus) l_line = [] if N: for i in range(NN, N): line = file.readline().strip() l_line.append(line) else: l_line = file.readlines() N = len(l_line) for i in range(N): l_line[i] = l_line[i].strip() file.close() previous_statement_text = None for conversation_count, text in enumerate(l_line): print_progress_bar("Subtitle Trainer", conversation_count + 1, N - NN) statement = self.get_or_create(text) if previous_statement_text: statement.add_response(Response(previous_statement_text)) previous_statement_text = statement.text self.storage.update(statement)
def train(self, *corpus_paths): # Allow a list of corpora to be passed instead of arguments if len(corpus_paths) == 1: if isinstance(corpus_paths[0], list): corpus_paths = corpus_paths[0] # Train the chat bot with each statement and response pair for corpus_path in corpus_paths: corpora = self.corpus.load_corpus(corpus_path) corpus_files = self.corpus.list_corpus_files(corpus_path) for corpus_count, corpus in enumerate(corpora): for conversation_count, conversation in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( str(os.path.basename(corpus_files[corpus_count])) + ' Training', conversation_count + 1, len(corpus)) previous_statement_text = None for text in conversation: statement = self.get_or_create(text) statement.add_tags(corpus.categories) if previous_statement_text: statement.add_response( Response(previous_statement_text)) previous_statement_text = statement.text self.chatbot.storage.update(statement)
def train(self, *corpus_paths): from chatterbot.corpus import load_corpus, list_corpus_files data_file_paths = [] # Get the paths to each file the bot will be trained with for corpus_path in corpus_paths: data_file_paths.extend(list_corpus_files(corpus_path)) for corpus, categories, file_path in load_corpus(*data_file_paths): statements_to_create = [] # Train the chat bot with each statement and response pair for conversation_count, conversation in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( 'Training ' + str(os.path.basename(file_path)), conversation_count + 1, len(corpus)) previous_statement_text = None previous_statement_search_text = '' for text in conversation: statement_search_text = self.stemmer.stem(text) _statement = Statement( text=text, search_text=statement_search_text, in_response_to=previous_statement_text, search_in_response_to=previous_statement_search_text, conversation='training') _statement.add_tags(*categories) statement = self.get_preprocessed_statement(_statement) previous_statement_text = statement.text previous_statement_search_text = statement_search_text statements_to_create.append({ 'text': statement.text, 'in_response_to': statement.in_response_to, 'conversation': statement.conversation, 'tags': statement.tags }) self.chatbot.storage.create_many(statements_to_create)
def train(self, *corpus_paths): from chatterbot.corpus import load_corpus, list_corpus_files data_file_paths = [] # Get the paths to each file the bot will be trained with for corpus_path in corpus_paths: data_file_paths.extend(list_corpus_files(corpus_path)) for corpus, categories, file_path in load_corpus(*data_file_paths): statements_to_create = [] # Train the chat bot with each statement and response pair for conversation_count, conversation in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( 'Training ' + str(os.path.basename(file_path)), conversation_count + 1, len(corpus) ) previous_statement_text = None previous_statement_search_text = '' for text in conversation: statement_search_text = self.stemmer.get_bigram_pair_string(text) statement = Statement( text=text, search_text=statement_search_text, in_response_to=previous_statement_text, search_in_response_to=previous_statement_search_text, conversation='training' ) statement.add_tags(*categories) statement = self.get_preprocessed_statement(statement) previous_statement_text = statement.text previous_statement_search_text = statement_search_text statements_to_create.append(statement) self.chatbot.storage.create_many(statements_to_create)
def train(self, data, mimic_name): """ Train the chat bot based on the provided list of statements that represents a single conversation. """ if not 'participants' in data: return m = self._merge_messages(data) print('Total messages: ' + str(len(m))) def clean(txt): return txt.lower().replace('\'', '') statements = [] for i in range(1, len(m)): if self.show_training_progress: utils.print_progress_bar('Fb Trainer', i + 1, len(m)) if m[i]['sender_name'] == mimic_name: text = clean(m[i]['content'])[:75] prev_text = clean(m[i - 1]['content'])[-75:] if len(text) == 0 or len(prev_text) == 0: continue search_text = self.chatbot.storage.tagger.get_bigram_pair_string( text) prev_search_text = self.chatbot.storage.tagger.get_bigram_pair_string( prev_text) #print(prev_text + ' ==> ' + text) statement = Statement(text=text, search_text=search_text, in_response_to=prev_text, search_in_response_to=prev_search_text, conversation='training') #print("statement from %s with response from %s" % (m[i-1]['sender_name'], m[i]['sender_name'])) statements.append(statement) if len(statements): self.chatbot.storage.create_many(statements)
def train(self): conversation = "hello" previous_statement_text = None results = self.data.get_all_generic_ingredient() print(results) for conversation_count, text in enumerate(conversation): if self.show_training_progress: utils.print_progress_bar('Recipe Trainer', conversation_count + 1, len(conversation)) statement = self.get_or_create(text) if previous_statement_text: statement.add_response(Response(previous_statement_text)) previous_statement_text = statement.text self.chatbot.storage.update(statement)
def train(self, *corpus_paths): # Allow a list of corpora to be passed instead of arguments if len(corpus_paths) == 1: if isinstance(corpus_paths[0], list): corpus_paths = corpus_paths[0] # Train the chat bot with each statement and response pair for corpus_path in corpus_paths: corpora = self.corpus.load_corpus(corpus_path) corpus_files = self.corpus.list_corpus_files(corpus_path) for corpus_count, corpus in enumerate(corpora): for conversation_count, conversation in enumerate(corpus): print_progress_bar( str(os.path.basename(corpus_files[corpus_count])) + " Training", conversation_count + 1, len(corpus)) previous_statement_line = [] statement_line = [] for line in conversation: for text in line: statement = self.get_or_create(text) statement.add_tags(corpus.categories) statement_line.append(statement.text) if previous_statement_line != []: for previous_statement_text in previous_statement_line: statement.add_response( Response(previous_statement_text)) self.storage.update(statement) previous_statement_line = statement_line statement_line = []
def train(self, *corpus_paths): from chatterbot.corpus import load_corpus, list_corpus_files data_file_paths = [] # Get the paths to each file the bot will be trained with for corpus_path in corpus_paths: data_file_paths.extend(list_corpus_files(corpus_path)) for corpus, categories, file_path in load_corpus(*data_file_paths): statements_to_create = [] # Train the chat bot with each statement and response pair for conversation_count, conversations in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( 'Training ' + str(os.path.basename(file_path)), conversation_count + 1, len(corpus)) previous_statements_texts = [None] previous_statements_search_texts = [''] for conversation in conversations: if isinstance(conversation, str): conversation = [conversation] statements_texts = [] statements_search_texts = [] for previous_statement_text, previous_statement_search_text in zip( previous_statements_texts, previous_statements_search_texts): for text in conversation: statement_search_text = self.chatbot.storage.tagger.get_bigram_pair_string( text) statement = Statement( text=text, search_text=statement_search_text, in_response_to=previous_statement_text, search_in_response_to= previous_statement_search_text, conversation='training') statement.add_tags(*categories) statement = self.get_preprocessed_statement( statement) statements_texts.append(statement.text) statements_search_texts.append( statement_search_text) statements_to_create.append(statement) previous_statements_texts = statements_texts previous_statements_search_texts = statements_search_texts if statements_to_create: self.chatbot.storage.create_many(statements_to_create)
def train(self, *corpus_paths): from chatterbot.corpus import load_corpus, list_corpus_files data_file_paths = [] # Get the paths to each file the bot will be trained with for corpus_path in corpus_paths: data_file_paths.extend(list_corpus_files(corpus_path)) for corpus, categories, file_path in load_corpus(*data_file_paths): statements_to_create = [] # Train the chat bot with each statement and response pair for conversation_count, conversation in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( 'Training ' + str(os.path.basename(file_path)), conversation_count + 1, len(corpus)) previous_statement_text = None previous_statement_search_text = '' for text in conversation: suggestion_tags = [] if text.strip('.?!/;:\'\"') in constants.AFFIRMATIVES: text = 'AFF' elif text.strip('.?!/;:\'\"') in constants.NEGATIVES: text = 'NEG' elif text[0] is '^': (suggestion, text) = text.split(maxsplit=1) suggestion = suggestion[1:] if not suggestion.find('/'): suggestion_tags.append(suggestion) else: for suggestion in suggestion.split('/'): suggestion_tags.append(suggestion) statement_search_text = self.chatbot.storage.tagger.get_bigram_pair_string( text) statement = Statement( text=text, search_text=statement_search_text, in_response_to=previous_statement_text, search_in_response_to=previous_statement_search_text, conversation='training') # YesNoLogicAdapter deals with responses to AFF/NEG via statement tags. # No need for statements in_response_to = AFF/NEG In fact, it was causing # erroneous responses if statement.in_response_to in ['AFF', 'NEG']: statement.in_response_to = None statement.search_in_response_to = None statement.add_tags(*categories) if suggestion_tags: for suggestion in suggestion_tags: statement.add_tags('SUGGESTION:' + suggestion) if previous_statement_text: if previous_statement_text == 'AFF': statements_to_create[-2].add_tags('AFF:' + statement.text) elif previous_statement_text == 'NEG': statements_to_create[-2].add_tags('NEG:' + statement.text) statement = self.get_preprocessed_statement(statement) previous_statement_text = statement.text previous_statement_search_text = statement_search_text statements_to_create.append(statement) # Using update() because create_many() makes duplicate statements. AFF/NEG tag data was lost on some. for stmnts in statements_to_create: self.chatbot.storage.update(stmnts)