Esempio n. 1
0
def create_model(session, forward_only):
    """Create translation model and initialize or load parameters in session."""
    dtype = tf.float16 if FLAGS.use_fp16 else tf.float32

    model = seq2seq_model.Seq2SeqModel(FLAGS.set1_vocab_size,
                                       FLAGS.set2_vocab_size,
                                       _buckets,
                                       FLAGS.size,
                                       FLAGS.num_layers,
                                       FLAGS.max_gradient_norm,
                                       FLAGS.batch_size,
                                       FLAGS.learning_rate,
                                       FLAGS.learning_rate_decay_factor,
                                       forward_only=forward_only)
    #dtype=dtype)

    # This should handle padding internally
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)

    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        #session.run(tf.initialize_all_variables())
        print("Also doing custom embedding tasks")
        embeddings = tf.Variable(tf.random_uniform(w2v_X.shape,
                                                   minval=-0.1,
                                                   maxval=0.1),
                                 trainable=False)
        session.run(tf.initialize_all_variables())
        session.run(embeddings.assign(w2v_X))
    return model
Esempio n. 2
0
    def create_model(self, forward_only):
        """Create translation model and initialize or load parameters in session."""
        dtype = tf.float16 if self.use_fp16 else tf.float32
        print("Creating %d layers of %d units." % (self.num_layers, self.size))

        model = seq2seq_model.Seq2SeqModel(
            self.input_vocab_size,
            self.output_vocab_size,
            self.buckets,
            self.size,
            self.num_layers,
            self.max_gradient_norm,
            self.batch_size,
            self.lr,
            self.lr_decay,
            forward_only=forward_only,
            dtype=dtype)
        ckpt = tf.train.get_checkpoint_state(self.train_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            model.saver.restore(self.session, ckpt.model_checkpoint_path)
        else:
            print("Created model with fresh parameters.")
            self.session.run(tf.initialize_all_variables())
        self.model = model
        return model
Esempio n. 3
0
def self_test():
    """Test the translation model."""
    with tf.Session() as sess:
        print("Self-test for neural translation model.")
        # Create model with vocabularies of 10, 2 small buckets, 2 layers of 32.
        model = seq2seq_model.Seq2SeqModel(10,
                                           10, [(3, 3), (6, 6)],
                                           32,
                                           2,
                                           5.0,
                                           32,
                                           0.3,
                                           0.99,
                                           num_samples=8)
        sess.run(tf.global_variables_initializer())

        # Fake data set for both the (3, 3) and (6, 6) bucket.
        data_set = ([([1, 1], [2, 2]), ([3, 3], [4]),
                     ([5], [6])], [([1, 1, 1, 1, 1], [2, 2, 2, 2, 2]),
                                   ([3, 3, 3], [5, 6])])
        for _ in xrange(5):  # Train the fake model for 5 steps.
            bucket_id = random.choice([0, 1])
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                data_set, bucket_id)
            model.step(sess, encoder_inputs, decoder_inputs, target_weights,
                       bucket_id, False)
Esempio n. 4
0
def self_test():

    with tf.Session() as sess:
        print("Self-test for neural translation model.")

        model = seq2seq_model.Seq2SeqModel(10,
                                           10, [(3, 3), (6, 6)],
                                           32,
                                           2,
                                           5.0,
                                           32,
                                           0.3,
                                           0.99,
                                           num_samples=8)
        sess.run(tf.initialize_all_variables())

        data_set = ([([1, 1], [2, 2]), ([3, 3], [4]),
                     ([5], [6])], [([1, 1, 1, 1, 1], [2, 2, 2, 2, 2]),
                                   ([3, 3, 3], [5, 6])])
        for _ in xrange(5):
            bucket_id = random.choice([0, 1])
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                data_set, bucket_id)
            model.step(sess, encoder_inputs, decoder_inputs, target_weights,
                       bucket_id, False)
Esempio n. 5
0
    def create_model(self, session, forward_only):
        self.model = seq2seq_model.Seq2SeqModel(
            FLAGS.in_vocab_size,
            FLAGS.out_vocab_size,
            _buckets,
            FLAGS.size,
            FLAGS.num_layers,
            FLAGS.max_gradient_norm,
            FLAGS.batch_size,
            FLAGS.learning_rate,
            FLAGS.learning_rate_decay_factor,
            forward_only=forward_only)

        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        #if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
        #add
        if ckpt and not os.path.isabs(ckpt.model_checkpoint_path):
            ckpt.model_checkpoint_path = os.path.abspath(
                os.path.join(os.getcwd(), ckpt.model_checkpoint_path))
            #so far
            print("Reading model parameters from %s" %
                  ckpt.model_checkpoint_path)
            self.model.saver.restore(session, ckpt.model_checkpoint_path)
        else:
            print("Created model with fresh parameters.")
            session.run(tf.initialize_all_variables())
        return self.model
Esempio n. 6
0
    def load_decode_model(self):
        """Load G2P model and initialize or load parameters in session."""
        self.batch_size = 1  # We decode one word at a time.
        #Load model parameters.
        num_layers, size = data_utils.load_params(self.model_dir)
        # Load vocabularies
        print("Loading vocabularies from %s" % self.model_dir)
        self.gr_vocab = data_utils.load_vocabulary(
            os.path.join(self.model_dir, "vocab.grapheme"))
        self.ph_vocab = data_utils.load_vocabulary(
            os.path.join(self.model_dir, "vocab.phoneme"))

        self.rev_ph_vocab =\
          data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"),
                                     reverse=True)

        self.session = tf.Session()

        # Restore model.
        print("Creating %d layers of %d units." % (num_layers, size))
        self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                                len(self.ph_vocab),
                                                self._BUCKETS,
                                                size,
                                                num_layers,
                                                0,
                                                self.batch_size,
                                                0,
                                                0,
                                                forward_only=True)
        self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        # Check for saved models and restore them.
        print("Reading model parameters from %s" % self.model_dir)
        self.model.saver.restore(self.session,
                                 os.path.join(self.model_dir, "model"))
Esempio n. 7
0
def create_model(session,
                 source_vocab_size,
                 target_vocab_size,
                 forward_only=True):
    """Create translation model and initialize or load parameters"""
    dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
    model = seq2seq_model.Seq2SeqModel(source_vocab_size,
                                       target_vocab_size,
                                       _buckets,
                                       FLAGS.size,
                                       FLAGS.num_layers,
                                       FLAGS.max_gradient_norm,
                                       FLAGS.batch_size,
                                       FLAGS.learning_rate,
                                       FLAGS.learning_rate_decay_factor,
                                       forward_only=forward_only,
                                       dtype=dtype)
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        session.run(tf.global_variables_initializer())
    return model
Esempio n. 8
0
def create_model(session, train_dir, forward_only):
	"""Create translation model and initialize or load parameters in session."""
	model = seq2seq_model.Seq2SeqModel(PARAM["que_vocab_size"], PARAM["ans_vocab_size"], _buckets,
	PARAM["size"], PARAM["num_layers"], PARAM["max_gradient_norm"], PARAM["batch_size"],
	PARAM["learning_rate"], PARAM["learning_rate_decay_factor"],
	forward_only=forward_only)
	ckpt = tf.train.get_checkpoint_state(train_dir)
	print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
	model.saver.restore(session, ckpt.model_checkpoint_path)

	return model
Esempio n. 9
0
def create_model(sess, forward_only):
    model = seq2seq_model.Seq2SeqModel(source_vocab_size=vocabulary_size,
                                       target_vocab_size=vocabulary_size,
                                       buckets=[(20, 21)],
                                       size=128,
                                       num_layers=2,
                                       max_gradient_norm=2.0,
                                       batch_size=batch_size,
                                       learning_rate=0.1,
                                       learning_rate_decay_factor=0.9,
                                       forward_only=forward_only)
    return model
Esempio n. 10
0
def create_model(forward_only):
    model = seq2seq_model.Seq2SeqModel(source_vocab_size=vocabulary_size,
                                       target_vocab_size=vocabulary_size,
                                       buckets=[(20, 20)],
                                       size=256,
                                       num_layers=4,
                                       max_gradient_norm=5.0,
                                       batch_size=batch_size,
                                       learning_rate=1.0,
                                       learning_rate_decay_factor=0.9,
                                       use_lstm=True,
                                       forward_only=forward_only)
    return model
def create_model2(session, forward_only):
    """Create translation model and initialize or load parameters in session."""
    model = seq2seq_model.Seq2SeqModel(FLAGS.en_vocab_size,
                                       FLAGS.fr_vocab_size,
                                       _buckets,
                                       FLAGS.size,
                                       FLAGS.num_layers,
                                       FLAGS.max_gradient_norm,
                                       FLAGS.batch_size,
                                       FLAGS.learning_rate,
                                       FLAGS.learning_rate_decay_factor,
                                       forward_only=forward_only)
    return model
Esempio n. 12
0
def create_model(session, forward_only):
  """Create translation model and initialize or load parameters in session."""
  model = seq2seq_model.Seq2SeqModel(
      FLAGS.en_vocab_size, FLAGS.fr_vocab_size, _buckets,
      FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size,
      FLAGS.learning_rate, FLAGS.learning_rate_decay_factor,
      forward_only=forward_only)
  ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
  if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    model.saver.restore(session, ckpt.model_checkpoint_path)
  else:
    print("Created model with fresh parameters.")
    session.run(tf.variables.initialize_all_variables())
  return model
Esempio n. 13
0
def create_model(session, forward_only):
  model = seq2seq_model.Seq2SeqModel(
      FLAGS.vocab_size, FLAGS.vocab_size, _buckets,
      FLAGS.size, FLAGS.num_layers, gradients_clip, FLAGS.batch_size,
      learning_rate, learning_rate_decay, use_lstm = FLAGS.use_lstm,
      forward_only=forward_only)
  ckpt = tf.train.get_checkpoint_state(train_dir)
  if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    model.saver.restore(session, ckpt.model_checkpoint_path)
  else:
    print("Created model with fresh parameters.")
    prepros.make_files(num_movie_scripts,FLAGS.vocab_size)
    session.run(tf.initialize_all_variables())
  return model
Esempio n. 14
0
def translation_model(sess, input_vocab_size, output_vocab_size, buckets,
                      rnn_size, num_layers, max_gradient, learning_rate,
                      lr_decay_rate, forward_only):
    model = seq2seq_model.Seq2SeqModel(input_vocab_size,
                                       output_vocab_size,
                                       buckets,
                                       rnn_size,
                                       num_layers,
                                       max_gradient,
                                       batch_size,
                                       learning_rate,
                                       lr_decay_rate,
                                       forward_only=forward_only,
                                       dtype=tf.float32)
    return (model)
Esempio n. 15
0
def translation_model(input_vocab_size, output_vocab_size, buckets, rnn_size,
                      num_layers, max_gradient, learning_rate, lr_decay_rate,
                      forward_only):
    model = seq2seq_model.Seq2SeqModel(
        source_vocab_size=input_vocab_size,
        target_vocab_size=output_vocab_size,
        buckets=buckets,
        size=rnn_size,
        num_layers=num_layers,
        max_gradient_norm=max_gradient,
        batch_size=batch_size,
        learning_rate=learning_rate,
        learning_rate_decay_factor=lr_decay_rate,
        forward_only=forward_only,
        dtype=tf.float32)
    return model
Esempio n. 16
0
  def __init__(self, train_file=None, valid_file=None, test_file=None):
    """Create G2P model and initialize or load parameters in session."""
    self.test_file = test_file

    # Preliminary actions before model creation.
    if FLAGS.train:
      #Load model parameters.
      num_layers, size = data_utils.save_params(FLAGS.num_layers, FLAGS.size,
                                                FLAGS.model)
      batch_size = FLAGS.batch_size
      # Prepare G2P data.
      print("Preparing G2P data")
      train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
      self.ph_vocab = data_utils.prepare_g2p_data(FLAGS.model, train_file,
                                                  valid_file)
      # Read data into buckets and compute their sizes.
      print ("Reading development and training data.")
      self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids)
      self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids)
    else:
      #Load model parameters.
      num_layers, size = data_utils.load_params(FLAGS.num_layers, FLAGS.size,
                                                FLAGS.model)
      batch_size = 1 # We decode one word at a time.
      # Load vocabularies
      self.gr_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model,
                                                              "vocab.grapheme"))
      self.ph_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model,
                                                              "vocab.phoneme"))

    self.rev_ph_vocab =\
        data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.phoneme"),
                                   reverse=True)

    self.session = tf.Session()

    # Create model.
    print("Creating %d layers of %d units." % (num_layers, size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            size, num_layers,
                                            FLAGS.max_gradient_norm, batch_size,
                                            FLAGS.learning_rate,
                                            FLAGS.learning_rate_decay_factor,
                                            forward_only=not FLAGS.train)
    self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
    self.__create_model()
Esempio n. 17
0
    def __train_init(self,
                     params,
                     train_path,
                     valid_path=None,
                     test_path=None):
        """Create G2P model and initialize or load parameters in session."""

        # Preliminary actions before model creation.
        # Load model parameters.

        if self.model_dir:
            data_utils.save_params(params.num_layers, params.size,
                                   self.model_dir)

        # Prepare G2P data.
        print("Preparing G2P data")
        train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
        self.ph_vocab, self.test_lines =\
        data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path,
                                    test_path)
        # Read data into buckets and compute their sizes.
        print("Reading development and training data.")
        self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids)
        self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids)

        self.rev_ph_vocab = dict([(x, y)
                                  for (y, x) in enumerate(self.ph_vocab)])

        self.session = tf.Session()

        # Create model.
        print("Creating %d layers of %d units." %
              (params.num_layers, params.size))
        self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                                len(self.ph_vocab),
                                                self._BUCKETS,
                                                params.size,
                                                params.num_layers,
                                                params.max_gradient_norm,
                                                params.batch_size,
                                                params.learning_rate,
                                                params.lr_decay_factor,
                                                forward_only=False)
        self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        print("Created model with fresh parameters.")
        self.session.run(tf.initialize_all_variables())
Esempio n. 18
0
def create_model(session, forward_only):
    model = seq2seq_model.Seq2SeqModel(FLAGS.in_vocab_size,
                                       FLAGS.out_vocab_size,
                                       _buckets,
                                       FLAGS.size,
                                       FLAGS.num_layers,
                                       FLAGS.max_gradient_norm,
                                       FLAGS.batch_size,
                                       FLAGS.learning_rate,
                                       FLAGS.learning_rate_decay_factor,
                                       forward_only=forward_only)
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    ckpt.model_checkpoint_path = "/home/mnortham/projects/LSTM/data/translate.ckpt-19800"
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        session.run(tf.initialize_all_variables())
    return model
def create_model(session, forward_only):
    model = seq2seq_model.Seq2SeqModel(config.CT_VOCAB_SIZE,
                                       config.OP_VOCAB_SIZE,
                                       _buckets,
                                       config.SIZE,
                                       config.NUM_LAYERS,
                                       config.MAX_GRADIENT_NORM,
                                       config.BATCH_SIZE,
                                       config.LEARNING_RATE,
                                       config.LEARNING_RATE_DECAY_FACTOR,
                                       forward_only=forward_only)

    ckpt = tf.train.get_checkpoint_state(config.TRAIN_DIR)

    if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        session.run(tf.initialize_all_variables())

    return model
Esempio n. 20
0
    def create_model(self, session, forward_only, scope_name):
        """Create translation model and initialize or load parameters in session."""
        dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
        model = seq2seq_model.Seq2SeqModel(FLAGS.src_vocab_size,
                                           FLAGS.tgt_vocab_size,
                                           _buckets,
                                           self.size,
                                           self.num_layers,
                                           FLAGS.max_gradient_norm,
                                           FLAGS.batch_size,
                                           FLAGS.learning_rate,
                                           FLAGS.learning_rate_decay_factor,
                                           forward_only=forward_only)

        ckpt = tf.train.get_checkpoint_state(self.train_dir)
        if ckpt and os.path.exists(ckpt.model_checkpoint_path):
            print "Reading model parameters from", ckpt.model_checkpoint_path
            model.saver.restore(session, ckpt.model_checkpoint_path)
            return model
        print "Checkpoint Directory not Found."
        return
Esempio n. 21
0
    def __prepare_model(self, params):
        """Prepare G2P model for training."""

        self.params = params

        self.session = tf.Session()

        # Prepare model.
        print("Creating %d layers of %d units." %
              (self.params.num_layers, self.params.size))
        self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                                len(self.ph_vocab),
                                                self._BUCKETS,
                                                self.params.size,
                                                self.params.num_layers,
                                                self.params.max_gradient_norm,
                                                self.params.batch_size,
                                                self.params.learning_rate,
                                                self.params.lr_decay_factor,
                                                forward_only=False)
        self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
Esempio n. 22
0
    def create_tf_model(self,
                        tf_session,
                        use_lstm=USE_LSTM,
                        num_samples=NUM_SAMPLES,
                        forward_only=FORWARD_ONLY):

        layer_size = int(self.config.get('model', 'layer_size'))
        num_layers = int(self.config.get('model', 'num_layers'))
        max_gradient_norm = float(self.config.get('model',
                                                  'max_gradient_norm'))
        batch_size = int(self.config.get('model', 'batch_size'))
        learning_rate = float(self.config.get('model', 'learning_rate'))
        learning_rate_decay_factor = float(
            self.config.get('model', 'learning_rate_decay_factor'))

        logging.info("creating seq2seq model: %d layers of %d units." %
                     (num_layers, layer_size))

        print len(self.input_dict), len(
            self.output_dict
        ), self.buckets, layer_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples, forward_only

        # 20000 20000 [(5, 10), (10, 15), (20, 25), (40, 50)] 128 1 5.0 64 0.5 0.99 True
        #   103    59 [(7, 4), (14, 8)]                       128 1 5.0 64 0.5 0.99 32 True

        self.model = seq2seq_model.Seq2SeqModel(len(self.input_dict),
                                                len(self.output_dict),
                                                self.buckets,
                                                layer_size,
                                                num_layers,
                                                max_gradient_norm,
                                                batch_size,
                                                learning_rate,
                                                learning_rate_decay_factor,
                                                num_samples=num_samples,
                                                forward_only=forward_only)
        init = tf.global_variables_initializer()
        tf_session.run(init)

        return self.model
Esempio n. 23
0
def create_model(session, decode_flag, gr_vocab_size, ph_vocab_size):
    """Create translation model and initialize or load parameters in session."""
    num_layers = FLAGS.num_layers
    size = FLAGS.size
    # Checking model's architecture for decode processes.
    if decode_flag:
        params_path = os.path.join(FLAGS.model, "model.params")
        if gfile.Exists(params_path):
            params = open(params_path).readlines()
            for line in params:
                line_splitted = line.strip().split(":")
                if line_splitted[0] == "num_layers":
                    num_layers = int(line_splitted[1])
                if line_splitted[0] == "size":
                    size = int(line_splitted[1])

    model = seq2seq_model.Seq2SeqModel(gr_vocab_size,
                                       ph_vocab_size,
                                       _BUCKETS,
                                       size,
                                       num_layers,
                                       FLAGS.max_gradient_norm,
                                       FLAGS.batch_size,
                                       FLAGS.learning_rate,
                                       FLAGS.learning_rate_decay_factor,
                                       forward_only=decode_flag)
    ckpt = tf.train.get_checkpoint_state(FLAGS.model)
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    elif tf.gfile.Exists(os.path.join(FLAGS.model, "model")):
        model.saver.restore(session, os.path.join(FLAGS.model, "model"))
    elif not decode_flag:
        print("Created model with fresh parameters.")
        session.run(tf.initialize_all_variables())
    else:
        raise ValueError("Model not found in %s" % ckpt.model_checkpoint_path)
    return model
Esempio n. 24
0
def create_model(session, forward_only):
  """Create translation model and initialize or load parameters in session."""
  # print(FLAGS.train_dir) # train
  if not os.path.exists(FLAGS.train_dir):
    os.mkdir(FLAGS.train_dir)
  print('Step 1 : Create transliteration model')
  model = seq2seq_model.Seq2SeqModel(
      FLAGS.en_vocab_size, FLAGS.fr_vocab_size, _buckets,
      FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size,
      FLAGS.learning_rate, FLAGS.learning_rate_decay_factor,
      forward_only=forward_only, use_lstm=FLAGS.use_lstm)
  
  ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
  print('Step 2 : Confirm checkpoint parameters')
  # print(ckpt) # checkpoint
  # print(ckpt.model_checkpoint_path) # check
  if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path + '.index'):
    print("Step 3 : Reading model parameters from %s" % ckpt.model_checkpoint_path)
    model.saver.restore(session, ckpt.model_checkpoint_path)
  else:
    print("Created model with fresh parameters.")
    session.run(tf.global_variables_initializer())
  return model
Esempio n. 25
0
    def create_tf_model(self,
                        tf_session,
                        layer_size=LAYER_SIZE,
                        num_layers=NUM_LAYERS,
                        max_gradient_norm=MAX_GRADIENT_NORM,
                        batch_size=BATCH_SIZE,
                        learning_rate=LEARNING_RATE,
                        learning_rate_decay_factor=LEARNING_RATE_DECAY_FACTOR,
                        use_lstm=USE_LSTM,
                        num_samples=NUM_SAMPLES,
                        forward_only=FORWARD_ONLY):

        logging.info("creating seq2seq model: %d layers of %d units." %
                     (num_layers, layer_size))

        print len(self.input_dict), len(
            self.output_dict
        ), BUCKETS, layer_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples, forward_only

        # 20000 20000 [(5, 10), (10, 15), (20, 25), (40, 50)] 128 1 5.0 64 0.5 0.99 True
        #   103    59 [(7, 4), (14, 8)]                       128 1 5.0 64 0.5 0.99 32 True

        self.model = seq2seq_model.Seq2SeqModel(len(self.input_dict),
                                                len(self.output_dict),
                                                BUCKETS,
                                                layer_size,
                                                num_layers,
                                                max_gradient_norm,
                                                batch_size,
                                                learning_rate,
                                                learning_rate_decay_factor,
                                                num_samples=num_samples,
                                                forward_only=forward_only)
        init = tf.global_variables_initializer()
        tf_session.run(init)

        return self.model
Esempio n. 26
0
def train():

    # word table 6000
    vocabulary_encode_size = 6000
    vocabulary_decode_size = 6000
    buckets = [(5, 10), (10, 15), (20, 25), (40, 50)]
    layer_size = 256  # 每层大小
    num_layers = 3  # 层数
    batch_size = 64

    model = seq2seq_model.Seq2SeqModel(
        source_vocab_size=vocabulary_encode_size,
        target_vocab_size=vocabulary_decode_size,
        buckets=buckets,
        size=layer_size,
        num_layers=num_layers,
        max_gradient_norm=5.0,
        batch_size=batch_size,
        learning_rate=0.5,
        learning_rate_decay_factor=0.97,
        forward_only=False)

    config = tf.ConfigProto()
    config.gpu_options.allocator_type = 'BFC'  # 防止 out of memory

    with tf.Session(config=config) as sess:
        # 恢复前一次训练
        ckpt = tf.train.get_checkpoint_state('.')
        if ckpt != None:
            print(ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        train_set = read_data("train_encode.vec", "train_decode.vec")
        test_set = read_data("test_encode.vec", "test_decode.vec")

        train_bucket_sizes = [len(train_set[b]) for b in range(len(buckets))]
        train_total_size = float(sum(train_bucket_sizes))
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in range(len(train_bucket_sizes))
        ]

        loss = 0.0
        total_step = 0
        previous_losses = []
        # 一直训练,每过一段时间保存一次模型
        while True:
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in range(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                         target_weights, bucket_id, False)

            loss += step_loss / 500
            total_step += 1

            print(total_step)
            if total_step % 500 == 0:
                print(model.global_step.eval(), model.learning_rate.eval(),
                      loss)

                # 如果模型没有得到提升,减小learning rate
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # 保存模型
                checkpoint_path = "chatbot_seq2seq.ckpt"
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                loss = 0.0
                # 使用测试数据评估模型
                for bucket_id in range(len(buckets)):
                    if len(test_set[bucket_id]) == 0:
                        continue
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        test_set, bucket_id)
                    _, eval_loss, _ = model.step(sess, encoder_inputs,
                                                 decoder_inputs,
                                                 target_weights, bucket_id,
                                                 True)
                    eval_ppx = math.exp(
                        eval_loss) if eval_loss < 300 else float('inf')
                    print(bucket_id, eval_ppx)
Esempio n. 27
0
def test():
    train_encode_vocabulary = 'train_encode_vocabulary'
    train_decode_vocabulary = 'train_decode_vocabulary'

    def read_vocabulary(input_file):
        tmp_vocab = []
        with open(input_file, "r") as f:
            tmp_vocab.extend(f.readlines())
        tmp_vocab = [line.strip() for line in tmp_vocab]
        vocab = dict([(x, y) for (y, x) in enumerate(tmp_vocab)])
        return vocab, tmp_vocab

    vocab_en, _, = read_vocabulary(train_encode_vocabulary)
    _, vocab_de, = read_vocabulary(train_decode_vocabulary)

    # 词汇表大小5000
    vocabulary_encode_size = 6000
    vocabulary_decode_size = 6000

    buckets = [(5, 10), (10, 15), (20, 25), (40, 50)]
    layer_size = 256  # 每层大小
    num_layers = 3  # 层数
    batch_size = 1

    model = seq2seq_model.Seq2SeqModel(
        source_vocab_size=vocabulary_encode_size,
        target_vocab_size=vocabulary_decode_size,
        buckets=buckets,
        size=layer_size,
        num_layers=num_layers,
        max_gradient_norm=5.0,
        batch_size=batch_size,
        learning_rate=0.5,
        learning_rate_decay_factor=0.99,
        forward_only=True)
    model.batch_size = 1
    with tf.Session() as sess:
        # 恢复前一次训练
        ckpt = tf.train.get_checkpoint_state('.')
        if ckpt != None:
            print(ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("没找到模型")

        while True:
            input_string = raw_input('me(Human) > ')
            # 退出
            if input_string == 'quit':
                exit()

            input_string_vec = []
            for words in input_string.strip():
                input_string_vec.append(vocab_en.get(words, UNK_ID))
            bucket_id = min([
                b for b in range(len(buckets))
                if buckets[b][0] > len(input_string_vec)
            ])
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                {bucket_id: [(input_string_vec, [])]}, bucket_id)
            _, _, output_logits = model.step(sess, encoder_inputs,
                                             decoder_inputs, target_weights,
                                             bucket_id, True)
            outputs = [
                int(np.argmax(logit, axis=1)) for logit in output_logits
            ]
            if EOS_ID in outputs:
                outputs = outputs[:outputs.index(EOS_ID)]

            response = "".join(
                [tf.compat.as_str(vocab_de[output]) for output in outputs])
            print('Robot > ' + response)
        for line in f:
            vocabs.append(line.strip())

    vocab_dict = dict([(x, y) for (y, x) in enumerate(vocabs)])
    return vocab_dict, vocabs


vocab_enc, _ = read_vocabulary("data/shooter/vocab.enc")
_, vocab_dec = read_vocabulary("data/shooter/vocab.dec")
batch_size = 1

model = seq2seq_model.Seq2SeqModel(VOCAB_SIZE,
                                   VOCAB_SIZE,
                                   buckets=buckets,
                                   size=layer_size,
                                   num_layers=num_layers,
                                   max_gradient_norm=5.0,
                                   batch_size=batch_size,
                                   learning_rate=0.5,
                                   learning_rate_decay_factor=0.99,
                                   forward_only=True)

sess = tf.Session()
ckpt = tf.train.get_checkpoint_state('data/shooter')
if ckpt != None:
    print(ckpt.model_checkpoint_path)
    model.saver.restore(sess, ckpt.model_checkpoint_path)
else:
    print("Failed to find model file")
    sys.exit(1)

while True:
Esempio n. 29
0
args = parser.parse_args()

_buckets = [(5, 10), (10, 15), (20, 25), (40, 50)]

if not os.path.exists(args.output_path):
    os.mkdir(args.output_path)

with tf.device('/cpu:0'):
    ckpt = tf.train.get_checkpoint_state(args.ckpt_path)
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    model = seq2seq_model.Seq2SeqModel(40000,
                                       40000,
                                       _buckets,
                                       1024,
                                       3,
                                       5,
                                       64,
                                       0.5,
                                       0.99,
                                       forward_only=True,
                                       use_lstm=True)
    model.saver.restore(sess, ckpt.model_checkpoint_path)
    print('Restored model ...')
    for var in tf.trainable_variables():
        print(var.name, var.get_shape())
        if not var.name.startswith('embedding_attention_seq2seq'):
            continue
        if not var.name.split('/')[1] == 'RNN':
            continue
        if var.name.split('/')[2] == 'EmbeddingWrapper':
            print('Saving Embedding ...')
Esempio n. 30
0
	return vocab, tmp_vocab
 
vocab_en, _, = read_vocabulary(train_encode_vocabulary)
_, vocab_de, = read_vocabulary(train_decode_vocabulary)
 
# 词汇表大小5000
vocabulary_encode_size = 5000
vocabulary_decode_size = 5000
 
buckets = [(5, 10), (10, 15), (20, 25), (40, 50)]
layer_size = 256  # 每层大小
num_layers = 3   # 层数
batch_size =  1
 
model = seq2seq_model.Seq2SeqModel(source_vocab_size=vocabulary_encode_size, target_vocab_size=vocabulary_decode_size,
                                   buckets=buckets, size=layer_size, num_layers=num_layers, max_gradient_norm= 5.0,
                                   batch_size=batch_size, learning_rate=0.5, learning_rate_decay_factor=0.99, forward_only=True)
model.batch_size = 1
 
with tf.Session() as sess:
	# 恢复前一次训练
	ckpt = tf.train.get_checkpoint_state('.')
	if ckpt != None:
		print(ckpt.model_checkpoint_path)
		model.saver.restore(sess, ckpt.model_checkpoint_path)
	else:
		print("没找到模型")
 
	while True:
		input_string = input('me > ')
		# 退出