def predict(self, sess, X, y=None): """Make predictions from the provided model.""" losses = [] results = [] if np.any(y): data = data_utils.data_iterator(X, y, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) else: data = data_utils.data_iterator(X, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) for step, (x, y) in enumerate(data): feed = self.create_feed_dict(input_batch=x, ) if np.any(y): feed[self.labels_placeholder] = y loss, preds = sess.run([self.loss, self.predictions], feed_dict=feed) losses.append(loss) else: preds = sess.run(self.predictions, feed_dict=feed) predicted_indices = preds.argmax(axis=1) results.extend(predicted_indices) return np.mean(losses), results
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = len(data) // m.batch_size start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate(data_utils.data_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], {m.input_data: x, m.targets: y, m.initial_state: state}) costs += cost iters += m.num_steps if verbose and step % 1 == 0: print("current step: %s, %.3f perplexity: %.3f speed: %.0f wps" % (m.global_step.eval(), step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) if m.is_training: print ('dumping model..') m.saver.save(session, os.path.join(FLAGS.train_dir, "word_seg.ckpt"), global_step=m.global_step) return np.exp(costs / iters)
def run_epoch(char_model, session, train_op, loss, raw_data, id_word_map, char_id_map, batch_size, learning_rate): epoch_size = (len(raw_data) / batch_size - 1) / char_model.seq_len total_cost = 0.0 total_len = 0.0 for i, (X_batch, y_batch) in enumerate( data_utils.data_iterator(raw_data, batch_size, char_model.seq_len)): X_char_batch = data_utils.word_ids_to_char_ids( X_batch, id_word_map, char_id_map, FLAGS.max_word_len) if char_model.is_training: _, cost = session.run( [train_op, loss], feed_dict={ char_model.input_X: X_char_batch, char_model.input_y: y_batch, char_model.learning_rate: learning_rate}) else: cost = session.run( loss, feed_dict={ char_model.input_X: X_char_batch, char_model.input_y: y_batch}) if i % (epoch_size / 10) == 10: print("Step %d, cost: %f" % (i, cost)) total_cost += cost total_len += char_model.seq_len ppl = calc_ppl(total_cost, total_len) return ppl
def run_epoch(self, session, input_data, input_labels, shuffle=True, verbose=True, train_op=None): """Runs an epoch of training. Trains the model for one-epoch. Args: sess: tf.Session() object input_data: np.ndarray of shape (n_samples, n_features) input_labels: np.ndarray of shape (n_samples, n_classes) Returns: average_loss: scalar. Average minibatch loss of model on epoch. """ # And then after everything is built, start the training loop. dp = self.config.dropout state = self.initial_state.eval() total_loss = [] total_correct_examples = 0 total_processed_examples = 0 total_steps = len(input_data) / self.config.batch_size tempData = data_utils.data_iterator(input_data, input_labels, batch_size=self.config.batch_size, label_size=self.config.n_classes, shuffle=shuffle) for step, (input_batch, label_batch) in enumerate(tempData): feed_dict = self.create_feed_dict(input_batch, label_batch, state, dropout=dp) loss, _ = session.run([self.loss, self.train_op], feed_dict=feed_dict) total_loss.append(loss) return np.mean(total_loss)
def run_eval(model, session, data, batch_size=1, num_steps=120, valid=True): """ Parameters ---------- model session data batch_size Returns ------- """ if valid: eval_type = "Valid" print("\nValidating:\n") else: eval_type = "Test" print("\nTesting:\n") costs = 0.0 iters = 0 total_words = 0 for step, (x, y, w, words) in enumerate(data_utils.data_iterator(data, batch_size, num_steps)): state = model.initial_state_train.eval() cost, _ = model.valid_step(session=session, lm_inputs=x, lm_targets=y, mask=w, state=state, dropout_rate=0.0) total_words += words costs += cost iters = step + 1 eval = costs / iters ppxs = numpy.exp(eval) if ppxs > 10000.0: print("%s PPX after epoch #%d: > 10000.0 - # words %d\n" % (eval_type, model.epoch.eval(), total_words)) else: print("%s PPX after epoch #%d: %f - # words %d\n" % (eval_type, model.epoch.eval(), ppxs, total_words)) return eval, ppxs, total_words
def run_epoch(session, m, data, eval_op, verbose=False): """Runs the model on the given data.""" epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps start_time = time.time() costs = 0.0 iters = 0 state = m.initial_state.eval() for step, (x, y) in enumerate( data_utils.data_iterator(data, m.batch_size, m.num_steps)): cost, state, _ = session.run([m.cost, m.final_state, eval_op], { m.input_data: x, m.targets: y, m.initial_state: state }) costs += cost iters += m.num_steps if verbose and step % (epoch_size // 10) == 10: print("%.3f perplexity: %.3f speed: %.0f wps" % (step * 1.0 / epoch_size, np.exp(costs / iters), iters * m.batch_size / (time.time() - start_time))) return np.exp(costs / iters)
def train_lm(FLAGS=None): assert FLAGS is not None if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) if not os.path.exists(FLAGS.best_models_dir): os.makedirs(FLAGS.best_models_dir) print('Preparing data in %s' % FLAGS.data_dir) src_train, src_dev, src_test = data_utils.prepare_lm_data(FLAGS) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: nan_detected = False print('Creating layers.') initializer = tf.random_uniform_initializer(-FLAGS.init_scale, FLAGS.init_scale) model = build_ops.create_lm_model(sess, is_training=True, FLAGS=FLAGS, initializer=initializer) print('Reading development and training data (limit: %d).' % FLAGS.max_train_data_size) train_data = data_utils.read_lm_data(src_train, max_size=FLAGS.max_train_data_size, FLAGS=FLAGS) train_total_size = len(train_data) m = max([len(s) for s in train_data]) a = float(sum([len(s) for s in train_data])) / len(train_data) print("Train max length : %d - (Avg. %.2f)" % (m, a)) valid_data = data_utils.read_lm_data(src_dev, FLAGS=FLAGS) m = max([len(s) for s in valid_data]) print("Valid max length : %d" % m) test_data = data_utils.read_lm_data(src_test, FLAGS=FLAGS) m = max([len(s) for s in test_data]) print("Test max length : %d" % m) epoch_size = train_total_size / FLAGS.batch_size print("Total number of updates per epoch: %d" % epoch_size) print("Optimization started...") total_loss = model.current_loss.eval() while model.epoch.eval() < FLAGS.max_epochs: saved = False n_target_words = 0 state_ = model.initial_state_train.eval() for step, (x, y, w, words) in enumerate(data_utils.data_iterator(train_data, model.batch_size, model.num_steps)): start_time = time.time() if FLAGS.reset_state: state = model.initial_state_train.eval() else: state = state_ n_target_words += words loss, state_ = model.train_step(session=sess, lm_inputs=x, lm_targets=y, mask=w, state=state, dropout_rate=FLAGS.dropout_rate) if numpy.isnan(loss) or numpy.isinf(loss): print 'NaN detected' nan_detected = True break total_loss += loss current_global_step = model.global_step.eval() if current_global_step % FLAGS.steps_verbosity == 0: end_time = time.time() total_time = end_time - start_time target_words_speed = n_target_words / total_time n_target_words = 0 avg_step_time = total_time / FLAGS.steps_verbosity avg_loss = total_loss / current_global_step ppx = numpy.exp(avg_loss) sess.run(model.current_loss.assign(total_loss)) sess.run(model.current_ppx.assign(ppx)) if ppx > 1000.0: print('epoch %d global step %d lr.rate %.4f avg.loss %.4f avg. ppx > 1000.0 avg. step time %.2f - avg. %.2f words/sec' % (model.epoch.eval(), current_global_step, model.learning_rate.eval(), avg_loss, avg_step_time, target_words_speed)) else: print('epoch %d global step %d lr.rate %.4f avg.loss %.4f avg. ppx %.4f avg. step time %.2f - avg. %.2f words/sec' % (model.epoch.eval(), current_global_step, model.learning_rate.eval(), avg_loss, ppx, avg_step_time, target_words_speed)) if FLAGS.steps_per_checkpoint > 0: if current_global_step % FLAGS.steps_per_checkpoint == 0: # Save checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) saved = True if FLAGS.steps_per_validation > 0: if current_global_step % FLAGS.steps_per_validation == 0: valid_loss, valid_ppx, n_words = run_eval( model=model, session=sess, data=valid_data, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_valid_steps ) test_loss, test_ppx, n_words = run_eval( model=model, session=sess, data=test_data, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_valid_steps, valid=False ) should_stop = check_early_stop(model=model, session=sess, ppx=valid_ppx, flags=FLAGS) if should_stop: break ep = model.epoch.eval() print("Epoch %d finished... " % ep) should_stop = False # updating epoch number sess.run(model.epoch_update_op) ep_new = model.epoch.eval() if FLAGS.save_each_epoch: # Save checkpoint print("Saving current model...") checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) if FLAGS.eval_after_each_epoch: valid_loss, valid_ppx, n_words = run_eval( model=model, session=sess, data=valid_data, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_valid_steps ) best_ppx = model.best_eval_ppx.eval() with codecs.open(FLAGS.best_models_dir + FLAGS.model_name + ".txt", "a", encoding="utf-8") as f: f.write("PPX after epoch #%d: %f (Current best PPX: %f)\n" % (ep - 1, valid_ppx, best_ppx)) if FLAGS.test_after_each_epoch: test_loss, test_ppx, n_words = run_eval( model=model, session=sess, data=test_data, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_valid_steps, valid=False ) with codecs.open(FLAGS.best_models_dir + FLAGS.model_name + ".txt", "a", encoding="utf-8") as f: f.write("PPX after epoch #%d: %f \n" % (ep - 1, test_ppx)) if FLAGS.steps_per_validation == 0: # if we are not validating after some steps, we validate after each epoch, # therefore we must check the early stop here should_stop = check_early_stop(model=model, session=sess, ppx=valid_ppx, flags=FLAGS) if ep + 1 >= FLAGS.max_epochs: if not saved: # Save checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) break if FLAGS.start_decay > 0: if FLAGS.stop_decay > 0: if FLAGS.start_decay <= model.epoch.eval() <= FLAGS.stop_decay: sess.run(model.learning_rate_decay_op) else: if FLAGS.start_decay <= model.epoch.eval(): sess.run(model.learning_rate_decay_op) if should_stop: break print("Epoch %d started..." % ep_new) sess.run(model.samples_seen_reset_op) # when we ran the right number of epochs or we reached early stop we finish training print("\nTraining finished!!\n") if not nan_detected: # # Save checkpoint checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) avg_eval_loss, avg_ppx, total_words = run_eval(model=model, session=sess, data=valid_data, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_valid_steps) print(' eval: averaged valid. loss %.8f\n' % avg_eval_loss) print("\n##### Test Results: #####\n") avg_test_loss, test_ppx, total_words = run_eval(model=model, session=sess, data=test_data, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_valid_steps, valid=False) print(' eval: averaged test loss %.8f\n' % avg_test_loss) sys.stdout.flush()
val_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/val') else: train_summary_writer = None val_summary_writer = None best_val_score = np.inf best_val_loss = np.inf best_epoch = 0 wait = 0 print('Training...') for epoch in range(NB_EPOCH): batch_iter = 0 data_iter = data_iterator([train_u_indices, train_v_indices, train_labels], batch_size=BATCHSIZE) try: while True: t = time.time() train_u_indices_batch, train_v_indices_batch, train_labels_batch = data_iter.next( ) # Collect all user and item nodes for train set train_u = list(set(train_u_indices_batch)) train_v = list(set(train_v_indices_batch)) train_u_dict = {n: i for i, n in enumerate(train_u)} train_v_dict = {n: i for i, n in enumerate(train_v)} train_u_indices_batch = np.array(