def __init__(self, session, corpus_dir, knbase_dir, result_dir, result_file): """ Args: session: The TensorFlow session. corpus_dir: Name of the folder storing corpus files and vocab information. knbase_dir: Name of the folder storing data files for the knowledge base. result_dir: The folder containing the trained result files. result_file: The file name of the trained model. """ self.session = session # Prepare data and hyper parameters print("# Prepare dataset placeholder and hyper parameters ...") tokenized_data = TokenizedData(corpus_dir=corpus_dir, training=False) self.knowledge_base = KnowledgeBase() self.knowledge_base.load_knbase(knbase_dir) self.session_data = SessionData() self.hparams = tokenized_data.hparams self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder) self.infer_batch = tokenized_data.get_inference_batch(src_dataset) # Create model print("# Creating inference model ...") self.model = ModelCreator(training=False, tokenized_data=tokenized_data, batch_input=self.infer_batch) # Restore model weights print("# Restoring model weights ...") self.model.saver.restore(session, os.path.join(result_dir, result_file)) self.session.run(tf.tables_initializer())
def __init__(self, session, corpus_dir, knbase_dir, result_dir, result_file): self.session = session # Prepare data and hyper parameters print("# Prepare dataset placeholder and hyper parameters ...") self.tokenized_data = TokenizedData(corpus_dir=corpus_dir, knbase_dir=knbase_dir, training=False) self.hparams = self.tokenized_data.hparams self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder) self.infer_batch = self.tokenized_data.get_inference_batch(src_dataset) # Create model print("# Creating inference model ...") self.model = ModelCreator(training=False, tokenized_data=self.tokenized_data, batch_input=self.infer_batch) # Restore model weights print("# Restoring model weights ...") self.model.saver.restore(session, os.path.join(result_dir, result_file)) self.session.run(tf.tables_initializer())
def __init__(self, session, corpus_dir, knbase_dir, result_dir, hparams_dir=None): self.session = session hparams = HParams(hparams_dir).hparams if hparams_dir else None # Prepare data and hyper parameters print("# Prepare dataset placeholder and hyper parameters ...") self.tokenized_data = TokenizedData(corpus_dir=corpus_dir, hparams=hparams, knbase_dir=knbase_dir, training=False) self.hparams = self.tokenized_data.hparams self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) src_dataset = tf.contrib.data.Dataset.from_tensor_slices( self.src_placeholder) self.infer_batch = self.tokenized_data.get_inference_batch(src_dataset) # Create model print("# Creating inference model ...") self.model = ModelCreator(training=False, tokenized_data=self.tokenized_data, batch_input=self.infer_batch) latest_ckpt = tf.train.latest_checkpoint(result_dir) print("# Restoring model weights ...") self.model.saver.restore(session, latest_ckpt) self.session.run(tf.tables_initializer())
def __init__(self, corpus_dir): self.graph = tf.Graph() with self.graph.as_default(): tokenized_data = TokenizedData(corpus_dir=corpus_dir) self.hparams = tokenized_data.hparams self.train_batch = tokenized_data.get_training_batch() # Return BatchInput namedtuple from .chatbot/tokenizedata.py self.model = ModelCreator(training=True, tokenized_data=tokenized_data, batch_input=self.train_batch)
def __init__(self, corpus_dir): self.graph = tf.Graph() with self.graph.as_default(): tokenized_data = TokenizedData(corpus_dir=corpus_dir) self.hparams = tokenized_data.hparams self.train_batch = tokenized_data.get_training_batch() self.model = ModelCreator(training=True, tokenized_data=tokenized_data, batch_input=self.train_batch)
def __init__(self, session, corpus_dir, knbase_dir, result_dir, aiml_dir, result_file): """ Args: session: The TensorFlow session. corpus_dir: Name of the folder storing corpus files and vocab information. knbase_dir: Name of the folder storing data files for the knowledge base. result_dir: The folder containing the trained result files. result_file: The file name of the trained model. """ self.session = session # Prepare data and hyper parameters print("# Prepare dataset placeholder and hyper parameters ...") tokenized_data = TokenizedData(corpus_dir=corpus_dir, training=False) self.knowledge_base = KnowledgeBase() self.knowledge_base.load_knbase(knbase_dir) self.session_data = SessionData() self.hparams = tokenized_data.hparams self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder) self.infer_batch = tokenized_data.get_inference_batch(src_dataset) # Create Retrival model self.kmodel = aiml.Kernel() brain_file_name = os.path.join(aiml_dir, BRAIN_FILE) print(aiml_dir) # Restore model rules if os.path.exists(brain_file_name): print("# Loading from brain file ... ") self.kmodel.loadBrain(brain_file_name) else: print("# Parsing aiml files ...") aimls_file_name = os.path.join(aiml_dir, AIMLS_FILE) self.kmodel.bootstrap(learnFiles=os.path.abspath(aimls_file_name), commands="load aiml b") print("# Saving brain file: " + BRAIN_FILE) self.kmodel.saveBrain(brain_file_name) # Create Generative model print("# Creating inference model ...") self.model = ModelCreator(training=False, tokenized_data=tokenized_data, batch_input=self.infer_batch) # Restore model weights print("# Restoring model weights ...") self.model.saver.restore(session, os.path.join(result_dir, result_file)) self.session.run(tf.tables_initializer())
def __init__(self, corpus_dir): """ Constructor of the BotTrainer. Args: corpus_dir: The folder to save all the training related data. """ self.graph = tf.Graph() with self.graph.as_default(): tokenized_data = TokenizedData(corpus_dir=corpus_dir) self.hparams = tokenized_data.hparams self.train_batch = tokenized_data.get_training_batch() self.model = ModelCreator(training=True, tokenized_data=tokenized_data, batch_input=self.train_batch)
class BotTrainer(object): def __init__(self, corpus_dir): self.graph = tf.Graph() with self.graph.as_default(): tokenized_data = TokenizedData(corpus_dir=corpus_dir) self.hparams = tokenized_data.hparams self.train_batch = tokenized_data.get_training_batch() self.model = ModelCreator(training=True, tokenized_data=tokenized_data, batch_input=self.train_batch) def train(self, result_dir, target=""): """Train a seq2seq model.""" # Summary writer summary_name = "train_log" summary_writer = tf.summary.FileWriter( os.path.join(result_dir, summary_name), self.graph) log_device_placement = self.hparams.log_device_placement num_epochs = self.hparams.num_epochs config_proto = tf.ConfigProto( log_device_placement=log_device_placement, allow_soft_placement=True) config_proto.gpu_options.allow_growth = True with tf.Session(target=target, config=config_proto, graph=self.graph) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) global_step = self.model.global_step.eval(session=sess) # Initialize all of the iterators sess.run(self.train_batch.initializer) # Initialize the statistic variables ckpt_loss, ckpt_predict_count = 0.0, 0.0 train_perp, last_record_perp = 2000.0, 2.0 train_epoch = 0 print("# Training loop started @ {}".format( time.strftime("%Y-%m-%d %H:%M:%S"))) epoch_start_time = time.time() while train_epoch < num_epochs: # Each run of this while loop is a training step, multiple time/steps will trigger # the train_epoch to be increased. learning_rate = self._get_learning_rate(train_perp) try: step_result = self.model.train_step( sess, learning_rate=learning_rate) (_, step_loss, step_predict_count, step_summary, global_step, step_word_count, batch_size) = step_result # Write step summary. summary_writer.add_summary(step_summary, global_step) # update statistics ckpt_loss += (step_loss * batch_size) ckpt_predict_count += step_predict_count except tf.errors.OutOfRangeError: # Finished going through the training dataset. Go to next epoch. train_epoch += 1 mean_loss = ckpt_loss / ckpt_predict_count train_perp = math.exp( float(mean_loss)) if mean_loss < 300 else math.inf epoch_dur = time.time() - epoch_start_time print( "# Finished epoch {:2d} @ step {:5d} @ {}. In the epoch, learning rate = {:.6f}, " "mean loss = {:.4f}, perplexity = {:8.4f}, and {:.2f} seconds elapsed." .format(train_epoch, global_step, time.strftime("%Y-%m-%d %H:%M:%S"), learning_rate, mean_loss, train_perp, round(epoch_dur, 2))) epoch_start_time = time.time( ) # The start time of the next epoch summary = tf.Summary(value=[ tf.Summary.Value(tag="train_perp", simple_value=train_perp) ]) summary_writer.add_summary(summary, global_step) # Save checkpoint if train_perp < 1.6 and train_perp < last_record_perp: self.model.saver.save(sess, os.path.join( result_dir, "basic"), global_step=global_step) last_record_perp = train_perp ckpt_loss, ckpt_predict_count = 0.0, 0.0 sess.run(self.model.batch_input.initializer) continue # Done training self.model.saver.save(sess, os.path.join(result_dir, "basic"), global_step=global_step) summary_writer.close() @staticmethod def _get_learning_rate(perplexity): if perplexity <= 1.48: return 9.6e-5 elif perplexity <= 1.64: return 1e-4 elif perplexity <= 2.0: return 1.2e-4 elif perplexity <= 2.4: return 1.6e-4 elif perplexity <= 3.2: return 2e-4 elif perplexity <= 4.8: return 2.4e-4 elif perplexity <= 8.0: return 3.2e-4 elif perplexity <= 16.0: return 4e-4 elif perplexity <= 32.0: return 6e-4 else: return 8e-4
class BotPredictor(object): def __init__(self, session, corpus_dir, knbase_dir, result_dir, result_file): """ Args: session: The TensorFlow session. corpus_dir: Name of the folder storing corpus files and vocab information. knbase_dir: Name of the folder storing data files for the knowledge base. result_dir: The folder containing the trained result files. result_file: The file name of the trained model. """ self.session = session # Prepare data and hyper parameters print("# Prepare dataset placeholder and hyper parameters ...") tokenized_data = TokenizedData(corpus_dir=corpus_dir, training=False) self.knowledge_base = KnowledgeBase() self.knowledge_base.load_knbase(knbase_dir) self.session_data = SessionData() self.hparams = tokenized_data.hparams self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder) self.infer_batch = tokenized_data.get_inference_batch(src_dataset) # Create model print("# Creating inference model ...") self.model = ModelCreator(training=False, tokenized_data=tokenized_data, batch_input=self.infer_batch) # Restore model weights print("# Restoring model weights ...") self.model.saver.restore(session, os.path.join(result_dir, result_file)) self.session.run(tf.tables_initializer()) def predict(self, session_id, question, html_format=False): chat_session = self.session_data.get_session(session_id) chat_session.before_prediction() # Reset before each prediction if question.strip() == '': answer = "Don't you want to say something to me?" chat_session.after_prediction(question, answer) return answer pat_matched, new_sentence, para_list = check_patterns_and_replace(question) for pre_time in range(2): tokens = nltk.word_tokenize(new_sentence.lower()) tmp_sentence = [' '.join(tokens[:]).strip()] self.session.run(self.infer_batch.initializer, feed_dict={self.src_placeholder: tmp_sentence}) outputs, _ = self.model.infer(self.session) if self.hparams.beam_width > 0: outputs = outputs[0] eos_token = self.hparams.eos_token.encode("utf-8") outputs = outputs.tolist()[0] if eos_token in outputs: outputs = outputs[:outputs.index(eos_token)] if pat_matched and pre_time == 0: out_sentence, if_func_val = self._get_final_output(outputs, chat_session, para_list=para_list, html_format=html_format) if if_func_val: chat_session.after_prediction(question, out_sentence) return out_sentence else: new_sentence = question else: out_sentence, _ = self._get_final_output(outputs, chat_session, html_format=html_format) chat_session.after_prediction(question, out_sentence) return out_sentence def _get_final_output(self, sentence, chat_session, para_list=None, html_format=False): sentence = b' '.join(sentence).decode('utf-8') if sentence == '': return "I don't know what to say.", False if_func_val = False last_word = None word_list = [] for word in sentence.split(' '): word = word.strip() if not word: continue if word.startswith('_func_val_'): if_func_val = True word = call_function(word[10:], knowledge_base=self.knowledge_base, chat_session=chat_session, para_list=para_list, html_format=html_format) if word is None or word == '': continue else: if word in self.knowledge_base.upper_words: word = self.knowledge_base.upper_words[word] if (last_word is None or last_word in ['.', '!', '?']) and not word[0].isupper(): word = word.capitalize() if not word.startswith('\'') and word != 'n\'t' \ and (word[0] not in string.punctuation or word in ['(', '[', '{', '``', '$']) \ and last_word not in ['(', '[', '{', '``', '$']: word = ' ' + word word_list.append(word) last_word = word return ''.join(word_list).strip(), if_func_val
class BotPredictor(object): def __init__(self, session, corpus_dir, knbase_dir, result_dir, result_file): self.session = session # Prepare data and hyper parameters print("# Prepare dataset placeholder and hyper parameters ...") self.tokenized_data = TokenizedData(corpus_dir=corpus_dir, knbase_dir=knbase_dir, training=False) self.hparams = self.tokenized_data.hparams self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder) self.infer_batch = self.tokenized_data.get_inference_batch(src_dataset) # Create model print("# Creating inference model ...") self.model = ModelCreator(training=False, tokenized_data=self.tokenized_data, batch_input=self.infer_batch) # Restore model weights print("# Restoring model weights ...") self.model.saver.restore(session, os.path.join(result_dir, result_file)) self.session.run(tf.tables_initializer()) def predict(self, sentence, html_format=False): if sentence.strip() == '': return "Don't you want to say something to me?" pat_matched, new_sentence, num_list = \ FunctionData.check_arithmetic_pattern_and_replace(sentence) for pre_time in range(2): tokens = nltk.word_tokenize(new_sentence.lower()) tmp_sentence = [' '.join(tokens[:]).strip()] self.session.run(self.infer_batch.initializer, feed_dict={self.src_placeholder: tmp_sentence}) outputs, _ = self.model.infer(self.session) if self.hparams.beam_width > 0: outputs = outputs[0] eos_token = self.hparams.eos_token.encode("utf-8") outputs = outputs.tolist()[0] if eos_token in outputs: outputs = outputs[:outputs.index(eos_token)] if pat_matched and pre_time == 0: out_sentence, if_func_val = self._get_final_output( outputs, para_list=num_list, html_format=html_format) if if_func_val: return out_sentence else: new_sentence = sentence else: out_sentence, _ = self._get_final_output( outputs, html_format=html_format) return out_sentence def _get_final_output(self, sentence, para_list=None, html_format=False): sentence = b' '.join(sentence).decode('utf-8') if sentence == '': return "I don't know what to say.", False if_func_val = False last_word = None word_list = [] for word in sentence.split(' '): word = word.strip() if not word: continue if word.startswith('_func_val_'): if_func_val = True word = call_function(word[10:], tokenized_data=self.tokenized_data, para_list=para_list, html_format=html_format) else: if word in self.tokenized_data.upper_words: word = self.tokenized_data.upper_words[word] if (last_word is None or last_word in ['.', '!', '?']) and not word[0].isupper(): word = word.capitalize() if not word.startswith('\'') and word != 'n\'t' \ and (word not in string.punctuation or word in ['(', '[', '{', '``', '$']) \ and last_word not in ['(', '[', '{', '``', '$']: word = ' ' + word word_list.append(word) last_word = word return ''.join(word_list).strip(), if_func_val
class BotTrainer(object): def __init__(self, corpus_dir): """ Constructor of the BotTrainer. Args: corpus_dir: The folder to save all the training related data. """ self.graph = tf.Graph() with self.graph.as_default(): tokenized_data = TokenizedData(corpus_dir=corpus_dir) self.hparams = tokenized_data.hparams self.train_batch = tokenized_data.get_training_batch() self.model = ModelCreator(training=True, tokenized_data=tokenized_data, batch_input=self.train_batch) def train(self, result_dir, target="", last_end_file=None, last_end_epoch=0, last_end_lr=8e-4): """Train a seq2seq model.""" # Summary writer summary_name = "train_log" summary_writer = tf.summary.FileWriter(os.path.join(result_dir, summary_name), self.graph) log_device_placement = self.hparams.log_device_placement num_epochs = self.hparams.num_epochs config_proto = tf.ConfigProto(log_device_placement=log_device_placement, allow_soft_placement=True) config_proto.gpu_options.allow_growth = True with tf.Session(target=target, config=config_proto, graph=self.graph) as sess: # This initialization is useful even when the model is restored from the last time # because not all variables used in the model training may be saved. sess.run(tf.global_variables_initializer()) if last_end_file: # Continue training from last time #print("Restoring model weights from last time ...") self.model.saver.restore(sess, os.path.join(result_dir, last_end_file)) sess.run(tf.tables_initializer()) global_step = self.model.global_step.eval(session=sess) # Initialize all of the iterators sess.run(self.train_batch.initializer) # Initialize the statistic variables ckpt_loss, ckpt_predict_count = 0.0, 0.0 train_perp, last_record_perp = 2000.0, 200.0 train_epoch = last_end_epoch learning_rate = pre_lr = last_end_lr #print("# Training loop started @ {}".format(time.strftime("%Y-%m-%d %H:%M:%S"))) epoch_start_time = time.time() while train_epoch < num_epochs: # Each run of this while loop is a training step, multiple time/steps will trigger # the train_epoch to be increased. try: step_result = self.model.train_step(sess, learning_rate=learning_rate) (_, step_loss, step_predict_count, step_summary, global_step, step_word_count, batch_size) = step_result # Write step summary. summary_writer.add_summary(step_summary, global_step) # update statistics ckpt_loss += (step_loss * batch_size) ckpt_predict_count += step_predict_count except tf.errors.OutOfRangeError: # Finished going through the training dataset. Go to next epoch. train_epoch += 1 mean_loss = ckpt_loss / ckpt_predict_count train_perp = math.exp(float(mean_loss)) if mean_loss < 300 else math.inf epoch_dur = time.time() - epoch_start_time #print("# Finished epoch {:2d} @ step {:5d} @ {}. In the epoch, learning rate = {:.6f}, " #"mean loss = {:.4f}, perplexity = {:8.4f}, and {:.2f} seconds elapsed." .format(train_epoch, global_step, time.strftime("%Y-%m-%d %H:%M:%S"), learning_rate, mean_loss, train_perp, round(epoch_dur, 2))) epoch_start_time = time.time() # The start time of the next epoch summary = tf.Summary(value=[tf.Summary.Value(tag="train_perp", simple_value=train_perp)]) summary_writer.add_summary(summary, global_step) # Save checkpoint if train_perp < last_record_perp: self.model.saver.save(sess, os.path.join(result_dir, "basic"), global_step=train_epoch) last_record_perp = train_perp ckpt_loss, ckpt_predict_count = 0.0, 0.0 learning_rate = self._get_learning_rate(train_perp, pre_lr, train_epoch) pre_lr = learning_rate sess.run(self.model.batch_input.initializer) continue # Done training self.model.saver.save(sess, os.path.join(result_dir, "basic"), global_step=train_epoch) summary_writer.close()