def create_model(session, forward_only, dropout, model_path=None): """Create translation model and initialize or load parameters in session.""" model = many2one_model.manySeq2SeqModel(FLAGS.input_vocab_size, FLAGS.output_vocab_size, _buckets, FLAGS.hidden_size, FLAGS.num_layers, FLAGS.embedding_size, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only, dropout=dropout) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path) and not model_path: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) steps_done = int(ckpt.model_checkpoint_path.split('-')[-1]) print("loaded from %d done steps" % (steps_done)) elif ckpt and tf.gfile.Exists( ckpt.model_checkpoint_path) and model_path is not None: model.saver.restore(session, model_path) steps_done = int(model_path.split('-')[-1]) print("Reading model parameters from %s" % model_path) print("loaded from %d done steps" % (steps_done)) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) steps_done = 0 return model, steps_done
def get_model_graph(session, forward_only): filter_sizes = [int(x) for x in FLAGS.filter_sizes.strip().split('-')] model = many2one_model.manySeq2SeqModel( FLAGS.input_vocab_size, FLAGS.output_vocab_size, _buckets, FLAGS.text_hidden_size, FLAGS.speech_hidden_size, FLAGS.parse_hidden_size, FLAGS.text_num_layers, FLAGS.speech_num_layers, FLAGS.parse_num_layers, filter_sizes, FLAGS.num_filters, feat_dim, FLAGS.fixed_word_length, FLAGS.embedding_size, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.attention_vector_size, FLAGS.speech_bucket_scale, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, FLAGS.optimizer, use_lstm=FLAGS.lstm, output_keep_prob=FLAGS.output_keep_prob, forward_only=forward_only) return model
def get_model_graph(session, forward_only): model = many2one_model.manySeq2SeqModel( FLAGS.input_vocab_size, FLAGS.output_vocab_size, _buckets, FLAGS.text_hidden_size, FLAGS.speech_hidden_size, FLAGS.parse_hidden_size, FLAGS.text_num_layers, FLAGS.speech_num_layers, FLAGS.parse_num_layers, FLAGS.embedding_size, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, FLAGS.optimizer, use_lstm=FLAGS.lstm, output_keep_prob=FLAGS.output_keep_prob, forward_only=forward_only) return model
def get_model_graph(session, forward_only): filter_sizes = [10,25,50] model = many2one_model.manySeq2SeqModel( input_vocab_size, output_vocab_size, _buckets, text_hidden_size, speech_hidden_size, parse_hidden_size, text_num_layers, speech_num_layers, parse_num_layers, filter_sizes, num_filters, feat_dim, fixed_word_length, embedding_size, max_gradient_norm, batch_size, attention_vector_size, speech_bucket_scale, learning_rate, learning_rate_decay_factor, optimizer, use_lstm=lstm, output_keep_prob=output_keep_prob, forward_only=forward_only) return model
def create_model(session, forward_only, dropout, model_path=None): """Create translation model and initialize or load parameters in session.""" model = many2one_model.manySeq2SeqModel(FLAGS.input_vocab_size, FLAGS.output_vocab_size, _buckets, FLAGS.hidden_size, FLAGS.num_layers, FLAGS.embedding_size, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only, dropout=dropout) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path) and not model_path: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) steps_done = int(ckpt.model_checkpoint_path.split('-')[-1]) print("loaded from %d done steps" % (steps_done)) elif ckpt and tf.gfile.Exists( ckpt.model_checkpoint_path) and model_path is not None: model.saver.restore(session, model_path) steps_done = int(model_path.split('-')[-1]) print("Reading model parameters from %s" % model_path) print("loaded from %d done steps" % (steps_done)) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) steps_done = 0 if FLAGS.warm_start: print("Warm start") saved_variables = pickle.load(open(FLAGS.warm_path)) my_variables = [v for v in tf.trainable_variables()] for v in my_variables: v_warm = map_var_names(v.name) print(v.name) print(v_warm) print(v_warm in saved_variables) if v_warm in saved_variables: old_v = saved_variables[v_warm] if old_v.shape != v.get_shape(): continue if "AttnOutputProjection" in v.name: continue # don't want to reuse this print("Initializing variable with warm start:", v.name) session.run(v.assign(old_v)) return model, steps_done
def train(): """Train a sequence to sequence parser.""" with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=NUM_THREADS)) as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.hidden_size)) with tf.variable_scope("model", reuse=None): model, steps_done = create_model(sess, forward_only=False, dropout=True) print("Now create model_dev") with tf.variable_scope("model", reuse=True): model_dev = many2one_model.manySeq2SeqModel( FLAGS.input_vocab_size, FLAGS.output_vocab_size, _buckets, FLAGS.hidden_size, FLAGS.num_layers, FLAGS.embedding_size, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=True, dropout=False) num_remaining_steps = FLAGS.max_steps - steps_done print("Num remaining steps: ", num_remaining_steps) step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] epoch = 0 while current_step <= num_remaining_steps: epoch += 1 print("Doing epoch: ", epoch) np.random.shuffle(train_set) for bucket_id, bucket_offset in train_set: this_sample = train_sw[bucket_id][bucket_offset:bucket_offset + FLAGS.batch_size] this_batch_size = len(this_sample) # Fix bug: added EOS_ID for s in range(this_batch_size): this_sample[s][1].append(data_utils.EOS_ID) text_encoder_inputs, speech_encoder_inputs, decoder_inputs, target_weights, seq_len = model.get_batch( {bucket_id: this_sample}, bucket_id) encoder_inputs_list = [ text_encoder_inputs, speech_encoder_inputs ] start_time = time.time() _, step_loss, _ = model.step(sess, encoder_inputs_list, decoder_inputs, target_weights, seq_len, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. #if current_step % FLAGS.steps_per_checkpoint == 0: if model.global_step.eval() % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. save_time = time.time() checkpoint_path = os.path.join(FLAGS.train_dir, "many2one_parse.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step, write_meta_graph=False) step_time, loss = 0.0, 0.0 if current_step > num_remaining_steps: break # end of one epoch, do write decodes to do evalb print("Current step: ", current_step) globstep = model.global_step.eval() eval_batch_size = FLAGS.batch_size write_time = time.time() write_decode(model_dev, sess, dev_set, eval_batch_size, globstep) time_elapsed = time.time() - write_time print("decode writing time: ", time_elapsed) sys.stdout.flush()