예제 #1
0
def create_model(session, forward_only, dropout, model_path=None):
    """Create translation model and initialize or load parameters in session."""
    model = many2one_model.manySeq2SeqModel(FLAGS.input_vocab_size,
                                            FLAGS.output_vocab_size,
                                            _buckets,
                                            FLAGS.hidden_size,
                                            FLAGS.num_layers,
                                            FLAGS.embedding_size,
                                            FLAGS.max_gradient_norm,
                                            FLAGS.batch_size,
                                            FLAGS.learning_rate,
                                            FLAGS.learning_rate_decay_factor,
                                            forward_only=forward_only,
                                            dropout=dropout)
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path) and not model_path:
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
        steps_done = int(ckpt.model_checkpoint_path.split('-')[-1])
        print("loaded from %d done steps" % (steps_done))
    elif ckpt and tf.gfile.Exists(
            ckpt.model_checkpoint_path) and model_path is not None:
        model.saver.restore(session, model_path)
        steps_done = int(model_path.split('-')[-1])
        print("Reading model parameters from %s" % model_path)
        print("loaded from %d done steps" % (steps_done))
    else:
        print("Created model with fresh parameters.")
        session.run(tf.initialize_all_variables())
        steps_done = 0
    return model, steps_done
예제 #2
0
def get_model_graph(session, forward_only):
    filter_sizes = [int(x) for x in FLAGS.filter_sizes.strip().split('-')]
    model = many2one_model.manySeq2SeqModel(
        FLAGS.input_vocab_size,
        FLAGS.output_vocab_size,
        _buckets,
        FLAGS.text_hidden_size,
        FLAGS.speech_hidden_size,
        FLAGS.parse_hidden_size,
        FLAGS.text_num_layers,
        FLAGS.speech_num_layers,
        FLAGS.parse_num_layers,
        filter_sizes,
        FLAGS.num_filters,
        feat_dim,
        FLAGS.fixed_word_length,
        FLAGS.embedding_size,
        FLAGS.max_gradient_norm,
        FLAGS.batch_size,
        FLAGS.attention_vector_size,
        FLAGS.speech_bucket_scale,
        FLAGS.learning_rate,
        FLAGS.learning_rate_decay_factor,
        FLAGS.optimizer,
        use_lstm=FLAGS.lstm,
        output_keep_prob=FLAGS.output_keep_prob,
        forward_only=forward_only)
    return model
def get_model_graph(session, forward_only):
  model = many2one_model.manySeq2SeqModel(
      FLAGS.input_vocab_size, FLAGS.output_vocab_size, _buckets,
      FLAGS.text_hidden_size, FLAGS.speech_hidden_size, FLAGS.parse_hidden_size, 
      FLAGS.text_num_layers, FLAGS.speech_num_layers, FLAGS.parse_num_layers,
      FLAGS.embedding_size, FLAGS.max_gradient_norm, FLAGS.batch_size,
      FLAGS.learning_rate, FLAGS.learning_rate_decay_factor,
      FLAGS.optimizer, use_lstm=FLAGS.lstm, 
      output_keep_prob=FLAGS.output_keep_prob, forward_only=forward_only)
  return model
예제 #4
0
def get_model_graph(session, forward_only):
  filter_sizes = [10,25,50]
  model = many2one_model.manySeq2SeqModel(
      input_vocab_size, output_vocab_size, _buckets,
      text_hidden_size, speech_hidden_size, parse_hidden_size, 
      text_num_layers, speech_num_layers, parse_num_layers,
      filter_sizes, num_filters, feat_dim, fixed_word_length,  
      embedding_size, max_gradient_norm, batch_size,
      attention_vector_size, speech_bucket_scale, 
      learning_rate, learning_rate_decay_factor,
      optimizer, use_lstm=lstm, 
      output_keep_prob=output_keep_prob, forward_only=forward_only)
  return model
def create_model(session, forward_only, dropout, model_path=None):
    """Create translation model and initialize or load parameters in session."""
    model = many2one_model.manySeq2SeqModel(FLAGS.input_vocab_size,
                                            FLAGS.output_vocab_size,
                                            _buckets,
                                            FLAGS.hidden_size,
                                            FLAGS.num_layers,
                                            FLAGS.embedding_size,
                                            FLAGS.max_gradient_norm,
                                            FLAGS.batch_size,
                                            FLAGS.learning_rate,
                                            FLAGS.learning_rate_decay_factor,
                                            forward_only=forward_only,
                                            dropout=dropout)
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path) and not model_path:
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
        steps_done = int(ckpt.model_checkpoint_path.split('-')[-1])
        print("loaded from %d done steps" % (steps_done))
    elif ckpt and tf.gfile.Exists(
            ckpt.model_checkpoint_path) and model_path is not None:
        model.saver.restore(session, model_path)
        steps_done = int(model_path.split('-')[-1])
        print("Reading model parameters from %s" % model_path)
        print("loaded from %d done steps" % (steps_done))
    else:
        print("Created model with fresh parameters.")
        session.run(tf.initialize_all_variables())
        steps_done = 0

        if FLAGS.warm_start:
            print("Warm start")
            saved_variables = pickle.load(open(FLAGS.warm_path))
            my_variables = [v for v in tf.trainable_variables()]
            for v in my_variables:
                v_warm = map_var_names(v.name)
                print(v.name)
                print(v_warm)
                print(v_warm in saved_variables)
                if v_warm in saved_variables:
                    old_v = saved_variables[v_warm]
                    if old_v.shape != v.get_shape(): continue
                    if "AttnOutputProjection" in v.name:
                        continue  # don't want to reuse this
                    print("Initializing variable with warm start:", v.name)
                    session.run(v.assign(old_v))

    return model, steps_done
예제 #6
0
def train():
    """Train a sequence to sequence parser."""

    with tf.Session(config=tf.ConfigProto(
            intra_op_parallelism_threads=NUM_THREADS)) as sess:
        # Create model.
        print("Creating %d layers of %d units." %
              (FLAGS.num_layers, FLAGS.hidden_size))
        with tf.variable_scope("model", reuse=None):
            model, steps_done = create_model(sess,
                                             forward_only=False,
                                             dropout=True)
        print("Now create model_dev")
        with tf.variable_scope("model", reuse=True):
            model_dev = many2one_model.manySeq2SeqModel(
                FLAGS.input_vocab_size,
                FLAGS.output_vocab_size,
                _buckets,
                FLAGS.hidden_size,
                FLAGS.num_layers,
                FLAGS.embedding_size,
                FLAGS.max_gradient_norm,
                FLAGS.batch_size,
                FLAGS.learning_rate,
                FLAGS.learning_rate_decay_factor,
                forward_only=True,
                dropout=False)

        num_remaining_steps = FLAGS.max_steps - steps_done
        print("Num remaining steps: ", num_remaining_steps)
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        epoch = 0

        while current_step <= num_remaining_steps:
            epoch += 1
            print("Doing epoch: ", epoch)
            np.random.shuffle(train_set)

            for bucket_id, bucket_offset in train_set:
                this_sample = train_sw[bucket_id][bucket_offset:bucket_offset +
                                                  FLAGS.batch_size]
                this_batch_size = len(this_sample)
                # Fix bug: added EOS_ID
                for s in range(this_batch_size):
                    this_sample[s][1].append(data_utils.EOS_ID)

                text_encoder_inputs, speech_encoder_inputs, decoder_inputs, target_weights, seq_len = model.get_batch(
                    {bucket_id: this_sample}, bucket_id)
                encoder_inputs_list = [
                    text_encoder_inputs, speech_encoder_inputs
                ]
                start_time = time.time()
                _, step_loss, _ = model.step(sess, encoder_inputs_list,
                                             decoder_inputs, target_weights,
                                             seq_len, bucket_id, False)
                step_time += (time.time() -
                              start_time) / FLAGS.steps_per_checkpoint
                loss += step_loss / FLAGS.steps_per_checkpoint
                current_step += 1

                # Once in a while, we save checkpoint, print statistics, and run evals.
                #if current_step % FLAGS.steps_per_checkpoint == 0:
                if model.global_step.eval() % FLAGS.steps_per_checkpoint == 0:
                    # Print statistics for the previous epoch.
                    perplexity = math.exp(loss) if loss < 300 else float('inf')
                    print(
                        "global step %d learning rate %.4f step-time %.2f perplexity "
                        "%.2f" %
                        (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity))
                    # Decrease learning rate if no improvement was seen over last 3 times.
                    if len(previous_losses) > 2 and loss > max(
                            previous_losses[-3:]):
                        sess.run(model.learning_rate_decay_op)
                    previous_losses.append(loss)
                    # Save checkpoint and zero timer and loss.
                    save_time = time.time()
                    checkpoint_path = os.path.join(FLAGS.train_dir,
                                                   "many2one_parse.ckpt")
                    model.saver.save(sess,
                                     checkpoint_path,
                                     global_step=model.global_step,
                                     write_meta_graph=False)
                    step_time, loss = 0.0, 0.0

                if current_step > num_remaining_steps: break

            # end of one epoch, do write decodes to do evalb
            print("Current step: ", current_step)
            globstep = model.global_step.eval()
            eval_batch_size = FLAGS.batch_size
            write_time = time.time()
            write_decode(model_dev, sess, dev_set, eval_batch_size, globstep)
            time_elapsed = time.time() - write_time
            print("decode writing time: ", time_elapsed)
            sys.stdout.flush()