def get_argument(self, value, slot): if value in self.value2argument: return self.value2argument[value] else: # I must add the argument and the return it if self.n_arguments_per_slot > 1: # for i in range(0, self.n_arguments_per_slot * 3): # n = randint(0, self.n_arguments_per_slot - 1) for i in range(0, self.n_arguments_per_slot): ARGUMENT = slot + '_' + str(i) if ARGUMENT in self.argument2value: continue else: self.add(value, ARGUMENT) return ARGUMENT # overwrite an existing argument !!! m = LogMessage() m.add('-' * 120) m.add('WARNING: overwriting an existing argument ARGUMENT = {a}, VALUE = {v}'.format(a=ARGUMENT, v=value)) m.add('ARGUMENTS: {a}'.format(a=self.argument2value)) m.add('') m.log() self.add(value, ARGUMENT) else: # there is only one argument per slot ARGUMENT = slot + '_X' self.add(value, ARGUMENT) return ARGUMENT
def get_argument(self, value, slot): if value in self.value2argument: return self.value2argument[value] else: # I must add the argument and the return it if self.n_arguments_per_slot > 1: # for i in range(0, self.n_arguments_per_slot * 3): # n = randint(0, self.n_arguments_per_slot - 1) for i in range(0, self.n_arguments_per_slot): ARGUMENT = slot + '_' + str(i) if ARGUMENT in self.argument2value: continue else: self.add(value, ARGUMENT) return ARGUMENT # overwrite an existing argument !!! m = LogMessage() m.add('-' * 120) m.add( 'WARNING: overwriting an existing argument ARGUMENT = {a}, VALUE = {v}' .format(a=ARGUMENT, v=value)) m.add('ARGUMENTS: {a}'.format(a=self.argument2value)) m.add('') m.log() self.add(value, ARGUMENT) else: # there is only one argument per slot ARGUMENT = slot + '_X' self.add(value, ARGUMENT) return ARGUMENT
def log_predictions_dataset(self, log_fn, actions_template, actions_arguments, batch_indexes): m = LogMessage(log_fn=log_fn) m.add('Shape of action template predictions: {s}'.format(s=actions_template.shape)) m.add('Shape of action arguments predictions: {s}'.format(s=actions_arguments.shape)) m.add() m.add('Predictions') m.add() # print(self.data.batch_histories.shape) # print(self.data.batch_actions_template.shape) # print(self.data.batch_actions_arguments.shape) # print(actions_template.shape) # print(actions_arguments.shape) # print(len(batch_indexes)) for prediction_batch_idx, batch_idx in enumerate(batch_indexes): for history in range(0, self.data.batch_histories.shape[1]): m.add('History {h}'.format(h=prediction_batch_idx * self.FLAGS.batch_size + history)) for j in range(self.data.batch_histories.shape[2]): utterance = [] for k in range(self.data.batch_histories.shape[3]): w = self.data.idx2word_history[self.data.batch_histories[batch_idx, history, j, k]] if w not in ['_SOS_', '_EOS_']: utterance.append(w) if utterance: m.add('U {j:2} : {c:80}'.format(j=j, c=' '.join(utterance))) w_histories_arguments = [] for j in range(self.data.batch_histories_arguments.shape[2]): w = self.data.idx2word_history_arguments[self.data.batch_histories_arguments[batch_idx, history, j]] w_histories_arguments.append(w) m.add('ArgsH: {t:80}'.format(t=', '.join(w_histories_arguments))) m.add('P : {t:80}'.format( t=self.data.idx2word_action_template[actions_template[prediction_batch_idx, history]]) ) w_actions_arguments = [] for j in range(actions_arguments.shape[2]): w = self.data.idx2word_action_arguments[actions_arguments[prediction_batch_idx, history, j]] w_actions_arguments.append(w) m.add('ArgsP: {t:80}'.format(t=', '.join(w_actions_arguments))) m.add('T : {t:80}'.format( t=self.data.idx2word_action_template[self.data.batch_actions_template[batch_idx, history]]) ) w_actions_arguments = [] for j in range(self.data.batch_actions_arguments.shape[2]): w = self.data.idx2word_action_arguments[self.data.batch_actions_arguments[batch_idx, history, j]] w_actions_arguments.append(w) m.add('ArgsT: {t:80}'.format(t=', '.join(w_actions_arguments))) m.add() # m.log(print_console=True, append=False) m.log(print_console=False, append=False)
def evaluate(self, epoch, learning_rate, sess): m = LogMessage() m.add('') m.add('Epoch: {epoch}'.format(epoch=epoch)) m.add(' - learning rate = {lr:e}'.format(lr=learning_rate.eval())) def batch_eval(batch_idx): return sess.run( [self.predictions, self.loss, self.accuracy], feed_dict={ self.batch_idx: batch_idx, self.use_inputs_prob: 1.0, self.dropout_keep_prob: 1.0, self.phase_train: False, }) self.train_predictions_action, train_loss, train_accuracy_action = self.batch_evaluate( batch_eval, self.data.train_batch_indexes) self.train_predictions_action_argmax = np.argmax( self.train_predictions_action, axis=2) m.add(' Train data') m.add(' - loss = {lss:f}'.format(lss=train_loss)) m.add( ' - accuracy = {acc:f}'.format(acc=train_accuracy_action)) self.dev_predictions_action, dev_loss, dev_accuracy_action = self.batch_evaluate( batch_eval, self.data.dev_batch_indexes) self.dev_predictions_action_argmax = np.argmax( self.dev_predictions_action, axis=2) m.add(' Dev data') m.add(' - loss = {lss:f}'.format(lss=dev_loss)) m.add(' - accuracy = {acc:f}'.format(acc=dev_accuracy_action)) self.test_predictions_action, test_loss, test_accuracy_action = self.batch_evaluate( batch_eval, self.data.test_batch_indexes) self.test_predictions_action_argmax = np.argmax( self.test_predictions_action, axis=2) m.add(' Test data') m.add(' - loss = {lss:f}'.format(lss=test_loss)) m.add(' - accuracy = {acc:f}'.format(acc=test_accuracy_action)) m.add() m.log() return train_accuracy_action, train_loss, \ dev_accuracy_action, dev_loss, \ test_accuracy_action, test_loss
def evaluate(self, epoch, learning_rate, sess): m = LogMessage() m.add('') m.add('Epoch: {epoch}'.format(epoch=epoch)) m.add(' - learning rate = {lr:e}'.format(lr=learning_rate.eval())) def batch_eval(batch_idx): return sess.run( [self.predictions, self.loss, self.accuracy], feed_dict={ self.batch_idx: batch_idx, self.use_inputs_prob: 1.0, self.dropout_keep_prob: 1.0, self.phase_train: False, } ) self.train_predictions_action, train_loss, train_accuracy_action = self.batch_evaluate(batch_eval, self.data.train_batch_indexes) self.train_predictions_action_argmax = np.argmax(self.train_predictions_action, axis=2) m.add(' Train data') m.add(' - loss = {lss:f}'.format(lss=train_loss)) m.add(' - accuracy = {acc:f}'.format(acc=train_accuracy_action)) self.dev_predictions_action, dev_loss, dev_accuracy_action = self.batch_evaluate(batch_eval, self.data.dev_batch_indexes) self.dev_predictions_action_argmax = np.argmax(self.dev_predictions_action, axis=2) m.add(' Dev data') m.add(' - loss = {lss:f}'.format(lss=dev_loss)) m.add(' - accuracy = {acc:f}'.format(acc=dev_accuracy_action)) self.test_predictions_action, test_loss, test_accuracy_action = self.batch_evaluate(batch_eval, self.data.test_batch_indexes) self.test_predictions_action_argmax = np.argmax(self.test_predictions_action, axis=2) m.add(' Test data') m.add(' - loss = {lss:f}'.format(lss=test_loss)) m.add(' - accuracy = {acc:f}'.format(acc=test_accuracy_action)) m.add() m.log() return train_accuracy_action, train_loss, \ dev_accuracy_action, dev_loss, \ test_accuracy_action, test_loss
def log_predictions_dataset(self, log_fn, actions, batch_indexes): m = LogMessage(log_fn=log_fn) m.add('Shape of action predictions: {s}'.format(s=actions.shape)) m.add('Argmax predictions') m.add() for prediction_batch_idx, batch_idx in enumerate(batch_indexes): for history in range(0, self.data.batch_histories.shape[1]): m.add('History {h}'.format( h=prediction_batch_idx * self.FLAGS.batch_size + history)) for j in range(self.data.batch_histories.shape[2]): utterance = [] for k in range(self.data.batch_histories.shape[3]): w = self.data.idx2word_history[ self.data.batch_histories[batch_idx, history, j, k]] if w not in ['_SOS_', '_EOS_']: utterance.append(w) if utterance: m.add('U {j}: {c:80}'.format(j=j, c=' '.join(utterance))) prediction = [] for j in range(actions.shape[2]): w = self.data.idx2word_action[actions[prediction_batch_idx, history, j]] if w not in ['_SOS_', '_EOS_']: prediction.append(w) m.add('P : {t:80}'.format(t=' '.join(prediction))) target = [] for j in range(self.data.batch_actions.shape[2]): w = self.data.idx2word_action[self.data.batch_actions[ batch_idx, history, j]] if w not in ['_SOS_', '_EOS_']: target.append(w) m.add('T : {t:80}'.format(t=' '.join(target))) m.add() # m.log(print_console=True, append=False) m.log(print_console=False, append=False)
def log_predictions_dataset(self, log_fn, actions_template, batch_indexes): m = LogMessage(log_fn=log_fn) m.add('Shape of action template predictions: {s}'.format( s=actions_template.shape)) m.add() m.add('Predictions') m.add() # print(self.data.batch_histories.shape) # print(self.data.batch_actions_template.shape) # print(actions_template.shape) # print(len(batch_indexes)) for prediction_batch_idx, batch_idx in enumerate(batch_indexes): for history in range(0, self.data.batch_histories.shape[1]): m.add('History {h}'.format( h=prediction_batch_idx * self.FLAGS.batch_size + history)) for j in range(self.data.batch_histories.shape[2]): utterance = [] for k in range(self.data.batch_histories.shape[3]): w = self.data.idx2word_history[ self.data.batch_histories[batch_idx, history, j, k]] if w not in ['_SOS_', '_EOS_']: utterance.append(w) if utterance: m.add('U {j}: {c:80}'.format(j=j, c=' '.join(utterance))) m.add( 'P : {t:80}'.format(t=self.data.idx2word_action_template[ actions_template[prediction_batch_idx, history]])) m.add( 'T : {t:80}'.format(t=self.data.idx2word_action_template[ self.data.batch_actions_template[batch_idx, history]])) m.add() # m.log() m.log(print_console=False, append=False)
def log_predictions_dataset(self, log_fn, actions, batch_indexes): m = LogMessage(log_fn=log_fn) m.add('Shape of action predictions: {s}'.format(s=actions.shape)) m.add('Argmax predictions') m.add() for prediction_batch_idx, batch_idx in enumerate(batch_indexes): for history in range(0, self.data.batch_histories.shape[1]): m.add('History {h}'.format(h=prediction_batch_idx * self.FLAGS.batch_size + history)) for j in range(self.data.batch_histories.shape[2]): utterance = [] for k in range(self.data.batch_histories.shape[3]): w = self.data.idx2word_history[self.data.batch_histories[batch_idx, history, j, k]] if w not in ['_SOS_', '_EOS_']: utterance.append(w) if utterance: m.add('U {j}: {c:80}'.format(j=j, c=' '.join(utterance))) prediction = [] for j in range(actions.shape[2]): w = self.data.idx2word_action[actions[prediction_batch_idx, history, j]] if w not in ['_SOS_', '_EOS_']: prediction.append(w) m.add('P : {t:80}'.format(t=' '.join(prediction))) target = [] for j in range(self.data.batch_actions.shape[2]): w = self.data.idx2word_action[self.data.batch_actions[batch_idx, history, j]] if w not in ['_SOS_', '_EOS_']: target.append(w) m.add('T : {t:80}'.format(t=' '.join(target))) m.add() # m.log(print_console=True, append=False) m.log(print_console=False, append=False)
def train(data, model): with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=FLAGS.threads, intra_op_parallelism_threads=FLAGS.threads, use_per_session_threads=True)) as sess: # Merge all the summaries and write them out to ./log merged_summaries = tf.merge_all_summaries() writer = tf.train.SummaryWriter(logging.exp_dir, sess.graph_def) saver = tf.train.Saver() # training t_vars = tf.trainable_variables() # t_vars = [v for v in t_vars if 'embedding_table' not in v.name] # all variables except embeddings learning_rate = tf.Variable(float(FLAGS.learning_rate), trainable=False) # train_op = tf.train.AdagradOptimizer( # learning_rate=learning_rate, # ) # train_op = AdamPlusCovOptimizer( train_op = AdamPlusOptimizer( learning_rate=learning_rate, beta1=FLAGS.beta1, beta2=FLAGS.beta2, epsilon=FLAGS.epsilon, pow=FLAGS.pow, dense_regularization=FLAGS.dense_regularization, sparse_regularization=FLAGS.sparse_regularization, use_locking=False, name='trainer') learning_rate_decay_op = learning_rate.assign(learning_rate * FLAGS.decay) global_step = tf.Variable(0, trainable=False) gradients = tf.gradients(model.loss, t_vars) clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm) train_op = train_op.apply_gradients(zip(clipped_gradients, t_vars), global_step=global_step) tf.initialize_all_variables().run() # prepare batch indexes m = LogMessage() train_set_size = model.data.train_set_size m.add('Train set size: {d}'.format(d=train_set_size)) batch_size = FLAGS.batch_size m.add('Batch size: {d}'.format(d=batch_size)) m.add('#Batches: {d}'.format(d=len(model.data.train_batch_indexes))) m.log() train_accuracies, train_losses = [], [] dev_accuracies, dev_losses = [], [] test_accuracies, test_losses = [], [] max_accuracy_epoch = 0 use_inputs_prob = 1.0 for epoch in range(FLAGS.max_epochs): # update the model LogMessage.write('Batch: ') for b, batch_idx in enumerate(model.data.train_batch_indexes): LogMessage.write(b) LogMessage.write(' ') sess.run( [train_op], feed_dict={ model.batch_idx: batch_idx, model.use_inputs_prob: use_inputs_prob, model.dropout_keep_prob: FLAGS.dropout_keep_prob, model.phase_train: True, } ) shuffle(model.data.train_batch_indexes) LogMessage.write('\n\n') LogMessage.write('Used inputs prob = {uip:f}'.format(uip=use_inputs_prob)) LogMessage.write('\n') # evaluate the model train_acc, train_lss, \ dev_acc, dev_lss, \ test_acc, test_lss = \ model.evaluate(epoch, learning_rate, sess) if epoch == 0 or dev_acc > max(dev_accuracies): max_accuracy_epoch = epoch model_fn = saver.save(sess, os.path.join(logging.exp_dir, "model.ckpt")) m = LogMessage() m.add('New max accuracy achieved on the dev data.') m.add("Model saved in file: {s}".format(s=model_fn)) m.log() # save predictions on train, dev, and test sets model.log_predictions() m = LogMessage() m.add() m.add("Epoch with max accuracy on dev data: {d}".format(d=max_accuracy_epoch)) m.add() m.log() # decrease learning rate if no improvement was seen over last 4 episodes. if len(train_losses) > 6 and train_lss >= max(train_losses[-4:]) + 1e-10: sess.run(learning_rate_decay_op) train_losses.append(train_lss) train_accuracies.append(train_acc) dev_losses.append(dev_lss) dev_accuracies.append(dev_acc) test_losses.append(test_lss) test_accuracies.append(test_acc) # stop when reached a threshold maximum or when no improvement of accuracy in the last 100 steps if train_acc > .999 or epoch > max_accuracy_epoch + 100: break use_inputs_prob *= FLAGS.use_inputs_prob_decay # save the results results = { 'epoch': epoch, 'max_accuracy_epoch_on_dev_data': max_accuracy_epoch, 'train_loss': str(train_losses[max_accuracy_epoch]), 'train_accuracy': str(train_accuracies[max_accuracy_epoch]), 'dev_loss': str(dev_losses[max_accuracy_epoch]), 'dev_accuracy': str(dev_accuracies[max_accuracy_epoch]), 'test_loss': str(test_losses[max_accuracy_epoch]), 'test_accuracy': str(test_accuracies[max_accuracy_epoch]), } LogExperiment(results) LogMessage(log_fn='.done', msg='done', time=True).log()
def main(run): start_experiment(run) if FLAGS.runs == 1: # set the seed to constant seed(0) tf.set_random_seed(1) graph = tf.Graph() with graph.as_default(): with graph.device(device_for_node_gpu if FLAGS.gpu else device_for_node_cpu): if 'w2t' in FLAGS.model: FLAGS.task = 'w2t' if 'w2w' in FLAGS.model: FLAGS.task = 'w2w' if 'w2targs' in FLAGS.model: FLAGS.task = 'w2targs' m = LogMessage(time=True) m.add('-' * 120) m.add('End to End Neural Dialogue Manager') m.add(' runs = {runs}'.format(runs=FLAGS.runs)) m.add(' threads = {threads}'.format(threads=FLAGS.threads)) m.add(' gpu = {gpu}'.format(gpu=FLAGS.gpu)) m.add(' model = {model}'.format(model=FLAGS.model)) m.add(' input = {i}'.format(i=FLAGS.input)) m.add(' data_fraction = {data_fraction}'.format(data_fraction=FLAGS.data_fraction)) m.add(' train_data = {train_data}'.format(train_data=FLAGS.train_data)) m.add(' dev_data = {dev_data}'.format(dev_data=FLAGS.dev_data)) m.add(' test_data = {test_data}'.format(test_data=FLAGS.test_data)) m.add(' ontology = {ontology}'.format(ontology=FLAGS.ontology)) m.add(' database = {database}'.format(database=FLAGS.database)) m.add(' max_epochs = {max_epochs}'.format(max_epochs=FLAGS.max_epochs)) m.add(' batch_size = {batch_size}'.format(batch_size=FLAGS.batch_size)) m.add(' learning_rate = {learning_rate:2e}'.format(learning_rate=FLAGS.learning_rate)) m.add(' decay = {decay}'.format(decay=FLAGS.decay)) m.add(' beta1 = {beta1}'.format(beta1=FLAGS.beta1)) m.add(' beta2 = {beta2}'.format(beta2=FLAGS.beta2)) m.add(' epsilon = {epsilon}'.format(epsilon=FLAGS.epsilon)) m.add(' pow = {pow}'.format(pow=FLAGS.pow)) m.add(' dense_regularization = {regularization}'.format(regularization=FLAGS.dense_regularization)) m.add(' sparse_regularization = {regularization}'.format(regularization=FLAGS.sparse_regularization)) m.add( ' max_gradient_norm = {max_gradient_norm}'.format(max_gradient_norm=FLAGS.max_gradient_norm)) m.add(' use_inputs_prob_decay = {use_inputs_prob_decay}'.format( use_inputs_prob_decay=FLAGS.use_inputs_prob_decay)) m.add( ' dropout_keep_prob = {dropout_keep_prob}'.format(dropout_keep_prob=FLAGS.dropout_keep_prob)) m.add('-' * 120) m.log() data = dataset.DSTC2( input=FLAGS.input, data_fraction=FLAGS.data_fraction, train_data_fn=FLAGS.train_data, dev_data_fn=FLAGS.dev_data, test_data_fn=FLAGS.test_data, ontology_fn=FLAGS.ontology, database_fn=FLAGS.database, batch_size=FLAGS.batch_size ) m = LogMessage() m.add('Database # rows: {d}'.format(d=len(data.database))) m.add('Database # columns: {d}'.format(d=len(data.database_word2idx.keys()))) m.add('History vocabulary size: {d}'.format(d=len(data.idx2word_history))) m.add('History args. vocabulary size: {d}'.format(d=len(data.idx2word_history_arguments))) m.add('State vocabulary size: {d}'.format(d=len(data.idx2word_state))) m.add('Action vocabulary size: {d}'.format(d=len(data.idx2word_action))) m.add('Action args. vocabulary size: {d}'.format(d=len(data.idx2word_action_arguments))) m.add('Action tmpl. vocabulary size: {d}'.format(d=len(data.idx2word_action_template))) m.add('-' * 120) m.log() if FLAGS.model == 'cnn-w2w': model = cnn_w2w.Model(data, FLAGS) elif FLAGS.model == 'rnn-w2w': model = rnn_w2w.Model(data, FLAGS) elif FLAGS.model == 'cnn12-w2t': model = cnn12_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn12-bn-w2targs': model = cnn12_bn_w2targs.Model(data, FLAGS) elif FLAGS.model == 'cnn12-bn-w2t': model = cnn12_bn_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn12-mp-bn-w2t': model = cnn12_mp_bn_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn13-bn-w2t': model = cnn13_bn_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn13-mp-bn-w2t': model = cnn13_mp_bn_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn12-att-a-w2t': model = cnn12_att_a_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn12-bn-att-a-w2targs': model = cnn12_bn_att_a_w2targs.Model(data, FLAGS) elif FLAGS.model == 'cnn12-bn-att-a-w2t': model = cnn12_bn_att_a_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn12-bn-att-a-bn-w2t': model = cnn12_bn_att_a_bn_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn12-mp-bn-att-a-w2t': model = cnn12_mp_bn_att_a_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn12-att-b-w2t': model = cnn12_att_b_w2t.Model(data, FLAGS) elif FLAGS.model == 'cnn23-mp-bn-w2t': model = cnn23_mp_bn_w2t.Model(data, FLAGS) elif FLAGS.model == 'rnn1-w2t': model = rnn1_w2t.Model(data, FLAGS) elif FLAGS.model == 'rnn2-w2t': model = rnn2_w2t.Model(data, FLAGS) else: raise Exception('Error: Unsupported model - {m}'.format(m=FLAGS.model)) train(data, model)
summary_hash = new_summary_hash # run only if we have some stats m = LogMessage(time=True) m.add('-' * 80) m.add('Experiment summary') m.add(' runs = {runs}'.format(runs=FLAGS.runs)) m.add() m.add(' epoch min = {d}'.format(d=min(epoch))) m.add(' max = {d}'.format(d=max(epoch))) m.add(' max_accuracy_epoch min = {d}'.format(d=min(max_accuracy_epoch_on_dev_data))) m.add(' max = {d}'.format(d=max(max_accuracy_epoch_on_dev_data))) m.add() m.add(' dev acc max = {f:6f}'.format(f=max(dev_accuracy))) m.add(' mean = {f:6f}'.format(f=mean(dev_accuracy))) if FLAGS.runs > 1: m.add(' stdev = {f:6f}'.format(f=stdev(dev_accuracy))) m.add(' min = {f:6f}'.format(f=min(dev_accuracy))) m.add() m.log() if done_runs >= len(ps): # stop this loop when all runs are finished break # for i, p in enumerate(ps): # p.join() # print('Joining process {d}'.format(d=i)) print('All done')