def create_model(session, forward_only): """Create translation model and initialize or load parameters in session.""" dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 model = seq2seq_model.Seq2SeqModel(FLAGS.set1_vocab_size, FLAGS.set2_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only) #dtype=dtype) # This should handle padding internally ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") #session.run(tf.initialize_all_variables()) print("Also doing custom embedding tasks") embeddings = tf.Variable(tf.random_uniform(w2v_X.shape, minval=-0.1, maxval=0.1), trainable=False) session.run(tf.initialize_all_variables()) session.run(embeddings.assign(w2v_X)) return model
def create_model(self, forward_only): """Create translation model and initialize or load parameters in session.""" dtype = tf.float16 if self.use_fp16 else tf.float32 print("Creating %d layers of %d units." % (self.num_layers, self.size)) model = seq2seq_model.Seq2SeqModel( self.input_vocab_size, self.output_vocab_size, self.buckets, self.size, self.num_layers, self.max_gradient_norm, self.batch_size, self.lr, self.lr_decay, forward_only=forward_only, dtype=dtype) ckpt = tf.train.get_checkpoint_state(self.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(self.session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") self.session.run(tf.initialize_all_variables()) self.model = model return model
def self_test(): """Test the translation model.""" with tf.Session() as sess: print("Self-test for neural translation model.") # Create model with vocabularies of 10, 2 small buckets, 2 layers of 32. model = seq2seq_model.Seq2SeqModel(10, 10, [(3, 3), (6, 6)], 32, 2, 5.0, 32, 0.3, 0.99, num_samples=8) sess.run(tf.global_variables_initializer()) # Fake data set for both the (3, 3) and (6, 6) bucket. data_set = ([([1, 1], [2, 2]), ([3, 3], [4]), ([5], [6])], [([1, 1, 1, 1, 1], [2, 2, 2, 2, 2]), ([3, 3, 3], [5, 6])]) for _ in xrange(5): # Train the fake model for 5 steps. bucket_id = random.choice([0, 1]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( data_set, bucket_id) model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False)
def self_test(): with tf.Session() as sess: print("Self-test for neural translation model.") model = seq2seq_model.Seq2SeqModel(10, 10, [(3, 3), (6, 6)], 32, 2, 5.0, 32, 0.3, 0.99, num_samples=8) sess.run(tf.initialize_all_variables()) data_set = ([([1, 1], [2, 2]), ([3, 3], [4]), ([5], [6])], [([1, 1, 1, 1, 1], [2, 2, 2, 2, 2]), ([3, 3, 3], [5, 6])]) for _ in xrange(5): bucket_id = random.choice([0, 1]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( data_set, bucket_id) model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False)
def create_model(self, session, forward_only): self.model = seq2seq_model.Seq2SeqModel( FLAGS.in_vocab_size, FLAGS.out_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) #if ckpt and gfile.Exists(ckpt.model_checkpoint_path): #add if ckpt and not os.path.isabs(ckpt.model_checkpoint_path): ckpt.model_checkpoint_path = os.path.abspath( os.path.join(os.getcwd(), ckpt.model_checkpoint_path)) #so far print("Reading model parameters from %s" % ckpt.model_checkpoint_path) self.model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) return self.model
def load_decode_model(self): """Load G2P model and initialize or load parameters in session.""" self.batch_size = 1 # We decode one word at a time. #Load model parameters. num_layers, size = data_utils.load_params(self.model_dir) # Load vocabularies print("Loading vocabularies from %s" % self.model_dir) self.gr_vocab = data_utils.load_vocabulary( os.path.join(self.model_dir, "vocab.grapheme")) self.ph_vocab = data_utils.load_vocabulary( os.path.join(self.model_dir, "vocab.phoneme")) self.rev_ph_vocab =\ data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"), reverse=True) self.session = tf.Session() # Restore model. print("Creating %d layers of %d units." % (num_layers, size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, size, num_layers, 0, self.batch_size, 0, 0, forward_only=True) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) # Check for saved models and restore them. print("Reading model parameters from %s" % self.model_dir) self.model.saver.restore(self.session, os.path.join(self.model_dir, "model"))
def create_model(session, source_vocab_size, target_vocab_size, forward_only=True): """Create translation model and initialize or load parameters""" dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 model = seq2seq_model.Seq2SeqModel(source_vocab_size, target_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only, dtype=dtype) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.global_variables_initializer()) return model
def create_model(session, train_dir, forward_only): """Create translation model and initialize or load parameters in session.""" model = seq2seq_model.Seq2SeqModel(PARAM["que_vocab_size"], PARAM["ans_vocab_size"], _buckets, PARAM["size"], PARAM["num_layers"], PARAM["max_gradient_norm"], PARAM["batch_size"], PARAM["learning_rate"], PARAM["learning_rate_decay_factor"], forward_only=forward_only) ckpt = tf.train.get_checkpoint_state(train_dir) print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) return model
def create_model(sess, forward_only): model = seq2seq_model.Seq2SeqModel(source_vocab_size=vocabulary_size, target_vocab_size=vocabulary_size, buckets=[(20, 21)], size=128, num_layers=2, max_gradient_norm=2.0, batch_size=batch_size, learning_rate=0.1, learning_rate_decay_factor=0.9, forward_only=forward_only) return model
def create_model(forward_only): model = seq2seq_model.Seq2SeqModel(source_vocab_size=vocabulary_size, target_vocab_size=vocabulary_size, buckets=[(20, 20)], size=256, num_layers=4, max_gradient_norm=5.0, batch_size=batch_size, learning_rate=1.0, learning_rate_decay_factor=0.9, use_lstm=True, forward_only=forward_only) return model
def create_model2(session, forward_only): """Create translation model and initialize or load parameters in session.""" model = seq2seq_model.Seq2SeqModel(FLAGS.en_vocab_size, FLAGS.fr_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only) return model
def create_model(session, forward_only): """Create translation model and initialize or load parameters in session.""" model = seq2seq_model.Seq2SeqModel( FLAGS.en_vocab_size, FLAGS.fr_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.variables.initialize_all_variables()) return model
def create_model(session, forward_only): model = seq2seq_model.Seq2SeqModel( FLAGS.vocab_size, FLAGS.vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, gradients_clip, FLAGS.batch_size, learning_rate, learning_rate_decay, use_lstm = FLAGS.use_lstm, forward_only=forward_only) ckpt = tf.train.get_checkpoint_state(train_dir) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") prepros.make_files(num_movie_scripts,FLAGS.vocab_size) session.run(tf.initialize_all_variables()) return model
def translation_model(sess, input_vocab_size, output_vocab_size, buckets, rnn_size, num_layers, max_gradient, learning_rate, lr_decay_rate, forward_only): model = seq2seq_model.Seq2SeqModel(input_vocab_size, output_vocab_size, buckets, rnn_size, num_layers, max_gradient, batch_size, learning_rate, lr_decay_rate, forward_only=forward_only, dtype=tf.float32) return (model)
def translation_model(input_vocab_size, output_vocab_size, buckets, rnn_size, num_layers, max_gradient, learning_rate, lr_decay_rate, forward_only): model = seq2seq_model.Seq2SeqModel( source_vocab_size=input_vocab_size, target_vocab_size=output_vocab_size, buckets=buckets, size=rnn_size, num_layers=num_layers, max_gradient_norm=max_gradient, batch_size=batch_size, learning_rate=learning_rate, learning_rate_decay_factor=lr_decay_rate, forward_only=forward_only, dtype=tf.float32) return model
def __init__(self, train_file=None, valid_file=None, test_file=None): """Create G2P model and initialize or load parameters in session.""" self.test_file = test_file # Preliminary actions before model creation. if FLAGS.train: #Load model parameters. num_layers, size = data_utils.save_params(FLAGS.num_layers, FLAGS.size, FLAGS.model) batch_size = FLAGS.batch_size # Prepare G2P data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab = data_utils.prepare_g2p_data(FLAGS.model, train_file, valid_file) # Read data into buckets and compute their sizes. print ("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) else: #Load model parameters. num_layers, size = data_utils.load_params(FLAGS.num_layers, FLAGS.size, FLAGS.model) batch_size = 1 # We decode one word at a time. # Load vocabularies self.gr_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.grapheme")) self.ph_vocab = data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.phoneme")) self.rev_ph_vocab =\ data_utils.load_vocabulary(os.path.join(FLAGS.model, "vocab.phoneme"), reverse=True) self.session = tf.Session() # Create model. print("Creating %d layers of %d units." % (num_layers, size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, size, num_layers, FLAGS.max_gradient_norm, batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=not FLAGS.train) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) self.__create_model()
def __train_init(self, params, train_path, valid_path=None, test_path=None): """Create G2P model and initialize or load parameters in session.""" # Preliminary actions before model creation. # Load model parameters. if self.model_dir: data_utils.save_params(params.num_layers, params.size, self.model_dir) # Prepare G2P data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab, self.test_lines =\ data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path, test_path) # Read data into buckets and compute their sizes. print("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) self.rev_ph_vocab = dict([(x, y) for (y, x) in enumerate(self.ph_vocab)]) self.session = tf.Session() # Create model. print("Creating %d layers of %d units." % (params.num_layers, params.size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, params.size, params.num_layers, params.max_gradient_norm, params.batch_size, params.learning_rate, params.lr_decay_factor, forward_only=False) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) print("Created model with fresh parameters.") self.session.run(tf.initialize_all_variables())
def create_model(session, forward_only): model = seq2seq_model.Seq2SeqModel(FLAGS.in_vocab_size, FLAGS.out_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) ckpt.model_checkpoint_path = "/home/mnortham/projects/LSTM/data/translate.ckpt-19800" if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) return model
def create_model(session, forward_only): model = seq2seq_model.Seq2SeqModel(config.CT_VOCAB_SIZE, config.OP_VOCAB_SIZE, _buckets, config.SIZE, config.NUM_LAYERS, config.MAX_GRADIENT_NORM, config.BATCH_SIZE, config.LEARNING_RATE, config.LEARNING_RATE_DECAY_FACTOR, forward_only=forward_only) ckpt = tf.train.get_checkpoint_state(config.TRAIN_DIR) if ckpt and gfile.Exists(ckpt.model_checkpoint_path): model.saver.restore(session, ckpt.model_checkpoint_path) else: session.run(tf.initialize_all_variables()) return model
def create_model(self, session, forward_only, scope_name): """Create translation model and initialize or load parameters in session.""" dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 model = seq2seq_model.Seq2SeqModel(FLAGS.src_vocab_size, FLAGS.tgt_vocab_size, _buckets, self.size, self.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only) ckpt = tf.train.get_checkpoint_state(self.train_dir) if ckpt and os.path.exists(ckpt.model_checkpoint_path): print "Reading model parameters from", ckpt.model_checkpoint_path model.saver.restore(session, ckpt.model_checkpoint_path) return model print "Checkpoint Directory not Found." return
def __prepare_model(self, params): """Prepare G2P model for training.""" self.params = params self.session = tf.Session() # Prepare model. print("Creating %d layers of %d units." % (self.params.num_layers, self.params.size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, self.params.size, self.params.num_layers, self.params.max_gradient_norm, self.params.batch_size, self.params.learning_rate, self.params.lr_decay_factor, forward_only=False) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
def create_tf_model(self, tf_session, use_lstm=USE_LSTM, num_samples=NUM_SAMPLES, forward_only=FORWARD_ONLY): layer_size = int(self.config.get('model', 'layer_size')) num_layers = int(self.config.get('model', 'num_layers')) max_gradient_norm = float(self.config.get('model', 'max_gradient_norm')) batch_size = int(self.config.get('model', 'batch_size')) learning_rate = float(self.config.get('model', 'learning_rate')) learning_rate_decay_factor = float( self.config.get('model', 'learning_rate_decay_factor')) logging.info("creating seq2seq model: %d layers of %d units." % (num_layers, layer_size)) print len(self.input_dict), len( self.output_dict ), self.buckets, layer_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples, forward_only # 20000 20000 [(5, 10), (10, 15), (20, 25), (40, 50)] 128 1 5.0 64 0.5 0.99 True # 103 59 [(7, 4), (14, 8)] 128 1 5.0 64 0.5 0.99 32 True self.model = seq2seq_model.Seq2SeqModel(len(self.input_dict), len(self.output_dict), self.buckets, layer_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples=num_samples, forward_only=forward_only) init = tf.global_variables_initializer() tf_session.run(init) return self.model
def create_model(session, decode_flag, gr_vocab_size, ph_vocab_size): """Create translation model and initialize or load parameters in session.""" num_layers = FLAGS.num_layers size = FLAGS.size # Checking model's architecture for decode processes. if decode_flag: params_path = os.path.join(FLAGS.model, "model.params") if gfile.Exists(params_path): params = open(params_path).readlines() for line in params: line_splitted = line.strip().split(":") if line_splitted[0] == "num_layers": num_layers = int(line_splitted[1]) if line_splitted[0] == "size": size = int(line_splitted[1]) model = seq2seq_model.Seq2SeqModel(gr_vocab_size, ph_vocab_size, _BUCKETS, size, num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=decode_flag) ckpt = tf.train.get_checkpoint_state(FLAGS.model) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) elif tf.gfile.Exists(os.path.join(FLAGS.model, "model")): model.saver.restore(session, os.path.join(FLAGS.model, "model")) elif not decode_flag: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) else: raise ValueError("Model not found in %s" % ckpt.model_checkpoint_path) return model
def create_model(session, forward_only): """Create translation model and initialize or load parameters in session.""" # print(FLAGS.train_dir) # train if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Step 1 : Create transliteration model') model = seq2seq_model.Seq2SeqModel( FLAGS.en_vocab_size, FLAGS.fr_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, forward_only=forward_only, use_lstm=FLAGS.use_lstm) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) print('Step 2 : Confirm checkpoint parameters') # print(ckpt) # checkpoint # print(ckpt.model_checkpoint_path) # check if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path + '.index'): print("Step 3 : Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.global_variables_initializer()) return model
def create_tf_model(self, tf_session, layer_size=LAYER_SIZE, num_layers=NUM_LAYERS, max_gradient_norm=MAX_GRADIENT_NORM, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, learning_rate_decay_factor=LEARNING_RATE_DECAY_FACTOR, use_lstm=USE_LSTM, num_samples=NUM_SAMPLES, forward_only=FORWARD_ONLY): logging.info("creating seq2seq model: %d layers of %d units." % (num_layers, layer_size)) print len(self.input_dict), len( self.output_dict ), BUCKETS, layer_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples, forward_only # 20000 20000 [(5, 10), (10, 15), (20, 25), (40, 50)] 128 1 5.0 64 0.5 0.99 True # 103 59 [(7, 4), (14, 8)] 128 1 5.0 64 0.5 0.99 32 True self.model = seq2seq_model.Seq2SeqModel(len(self.input_dict), len(self.output_dict), BUCKETS, layer_size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples=num_samples, forward_only=forward_only) init = tf.global_variables_initializer() tf_session.run(init) return self.model
def train(): # word table 6000 vocabulary_encode_size = 6000 vocabulary_decode_size = 6000 buckets = [(5, 10), (10, 15), (20, 25), (40, 50)] layer_size = 256 # 每层大小 num_layers = 3 # 层数 batch_size = 64 model = seq2seq_model.Seq2SeqModel( source_vocab_size=vocabulary_encode_size, target_vocab_size=vocabulary_decode_size, buckets=buckets, size=layer_size, num_layers=num_layers, max_gradient_norm=5.0, batch_size=batch_size, learning_rate=0.5, learning_rate_decay_factor=0.97, forward_only=False) config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' # 防止 out of memory with tf.Session(config=config) as sess: # 恢复前一次训练 ckpt = tf.train.get_checkpoint_state('.') if ckpt != None: print(ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) train_set = read_data("train_encode.vec", "train_decode.vec") test_set = read_data("test_encode.vec", "test_decode.vec") train_bucket_sizes = [len(train_set[b]) for b in range(len(buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in range(len(train_bucket_sizes)) ] loss = 0.0 total_step = 0 previous_losses = [] # 一直训练,每过一段时间保存一次模型 while True: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in range(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) loss += step_loss / 500 total_step += 1 print(total_step) if total_step % 500 == 0: print(model.global_step.eval(), model.learning_rate.eval(), loss) # 如果模型没有得到提升,减小learning rate if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # 保存模型 checkpoint_path = "chatbot_seq2seq.ckpt" model.saver.save(sess, checkpoint_path, global_step=model.global_step) loss = 0.0 # 使用测试数据评估模型 for bucket_id in range(len(buckets)): if len(test_set[bucket_id]) == 0: continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( test_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') print(bucket_id, eval_ppx)
def test(): train_encode_vocabulary = 'train_encode_vocabulary' train_decode_vocabulary = 'train_decode_vocabulary' def read_vocabulary(input_file): tmp_vocab = [] with open(input_file, "r") as f: tmp_vocab.extend(f.readlines()) tmp_vocab = [line.strip() for line in tmp_vocab] vocab = dict([(x, y) for (y, x) in enumerate(tmp_vocab)]) return vocab, tmp_vocab vocab_en, _, = read_vocabulary(train_encode_vocabulary) _, vocab_de, = read_vocabulary(train_decode_vocabulary) # 词汇表大小5000 vocabulary_encode_size = 6000 vocabulary_decode_size = 6000 buckets = [(5, 10), (10, 15), (20, 25), (40, 50)] layer_size = 256 # 每层大小 num_layers = 3 # 层数 batch_size = 1 model = seq2seq_model.Seq2SeqModel( source_vocab_size=vocabulary_encode_size, target_vocab_size=vocabulary_decode_size, buckets=buckets, size=layer_size, num_layers=num_layers, max_gradient_norm=5.0, batch_size=batch_size, learning_rate=0.5, learning_rate_decay_factor=0.99, forward_only=True) model.batch_size = 1 with tf.Session() as sess: # 恢复前一次训练 ckpt = tf.train.get_checkpoint_state('.') if ckpt != None: print(ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("没找到模型") while True: input_string = raw_input('me(Human) > ') # 退出 if input_string == 'quit': exit() input_string_vec = [] for words in input_string.strip(): input_string_vec.append(vocab_en.get(words, UNK_ID)) bucket_id = min([ b for b in range(len(buckets)) if buckets[b][0] > len(input_string_vec) ]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(input_string_vec, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] if EOS_ID in outputs: outputs = outputs[:outputs.index(EOS_ID)] response = "".join( [tf.compat.as_str(vocab_de[output]) for output in outputs]) print('Robot > ' + response)
for line in f: vocabs.append(line.strip()) vocab_dict = dict([(x, y) for (y, x) in enumerate(vocabs)]) return vocab_dict, vocabs vocab_enc, _ = read_vocabulary("data/shooter/vocab.enc") _, vocab_dec = read_vocabulary("data/shooter/vocab.dec") batch_size = 1 model = seq2seq_model.Seq2SeqModel(VOCAB_SIZE, VOCAB_SIZE, buckets=buckets, size=layer_size, num_layers=num_layers, max_gradient_norm=5.0, batch_size=batch_size, learning_rate=0.5, learning_rate_decay_factor=0.99, forward_only=True) sess = tf.Session() ckpt = tf.train.get_checkpoint_state('data/shooter') if ckpt != None: print(ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Failed to find model file") sys.exit(1) while True:
args = parser.parse_args() _buckets = [(5, 10), (10, 15), (20, 25), (40, 50)] if not os.path.exists(args.output_path): os.mkdir(args.output_path) with tf.device('/cpu:0'): ckpt = tf.train.get_checkpoint_state(args.ckpt_path) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) model = seq2seq_model.Seq2SeqModel(40000, 40000, _buckets, 1024, 3, 5, 64, 0.5, 0.99, forward_only=True, use_lstm=True) model.saver.restore(sess, ckpt.model_checkpoint_path) print('Restored model ...') for var in tf.trainable_variables(): print(var.name, var.get_shape()) if not var.name.startswith('embedding_attention_seq2seq'): continue if not var.name.split('/')[1] == 'RNN': continue if var.name.split('/')[2] == 'EmbeddingWrapper': print('Saving Embedding ...')
return vocab, tmp_vocab vocab_en, _, = read_vocabulary(train_encode_vocabulary) _, vocab_de, = read_vocabulary(train_decode_vocabulary) # 词汇表大小5000 vocabulary_encode_size = 5000 vocabulary_decode_size = 5000 buckets = [(5, 10), (10, 15), (20, 25), (40, 50)] layer_size = 256 # 每层大小 num_layers = 3 # 层数 batch_size = 1 model = seq2seq_model.Seq2SeqModel(source_vocab_size=vocabulary_encode_size, target_vocab_size=vocabulary_decode_size, buckets=buckets, size=layer_size, num_layers=num_layers, max_gradient_norm= 5.0, batch_size=batch_size, learning_rate=0.5, learning_rate_decay_factor=0.99, forward_only=True) model.batch_size = 1 with tf.Session() as sess: # 恢复前一次训练 ckpt = tf.train.get_checkpoint_state('.') if ckpt != None: print(ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("没找到模型") while True: input_string = input('me > ') # 退出