def create_model(session, forward_only, beam_search): dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 model = seq2seq_model.Seq2SeqModel(FLAGS.post_vocab_size, FLAGS.response_vocab_size, _buckets, FLAGS.size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, embedding_size=FLAGS.embedding_size, forward_only=forward_only, beam_search=beam_search, beam_size=FLAGS.beam_size, category=FLAGS.category, use_emb=FLAGS.use_emb, use_imemory=FLAGS.use_imemory, use_ememory=FLAGS.use_ememory, emotion_size=FLAGS.emotion_size, imemory_size=FLAGS.imemory_size, dtype=dtype) see_variable = True if see_variable == True: for i in tf.all_variables(): print(i.name, i.get_shape()) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) pre_ckpt = tf.train.get_checkpoint_state(FLAGS.pretrain_dir) if ckpt: #and tf.gfile.Exists(ckpt.model_checkpoint_path+".index"): if FLAGS.load_model == 0: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: path = ckpt.model_checkpoint_path[:ckpt.model_checkpoint_path. find('-') + 1] + str( FLAGS.load_model) print("Reading model parameters from %s" % path) model.saver.restore(session, path) else: if pre_ckpt: session.run(tf.initialize_variables(model.initial_var)) if FLAGS.pretrain > -1: path = pre_ckpt.model_checkpoint_path[:pre_ckpt. model_checkpoint_path. find('-') + 1] + str( FLAGS.pretrain) print("Reading pretrain model parameters from %s" % path) model.pretrain_saver.restore(session, path) else: print("Reading pretrain model parameters from %s" % pre_ckpt.model_checkpoint_path) model.pretrain_saver.restore(session, pre_ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) with open('word2vec.npy', 'rb') as f: wordvec = np.load(f) vec_post = wordvec vec_response = wordvec initvec_post = tf.constant(vec_post, dtype=dtype, name='init_wordvector_post') initvec_response = tf.constant(vec_response, dtype=dtype, name='init_wordvector_response') embedding_post = [ x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/RNN/EmbeddingWrapper/embedding:0' ][0] embedding_response = [ x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/embedding:0' ][0] session.run(embedding_post.assign(initvec_post)) session.run(embedding_response.assign(initvec_response)) if FLAGS.use_ememory: vec_ememory = data_utils.get_ememory(FLAGS.data_dir, FLAGS.response_vocab_size) initvec_ememory = tf.constant(vec_ememory, dtype=dtype, name='init_ememory') ememory = [ x for x in tf.all_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/external_memory:0' ][0] session.run(ememory.assign(initvec_ememory)) return model
def create_model(session, forward_only, beam_search): dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 model = seq2seq_model.Seq2SeqModel(FLAGS.post_vocab_size, FLAGS.response_vocab_size, _buckets, FLAGS.hidden_size, FLAGS.num_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, embedding_size=FLAGS.embedding_size, forward_only=forward_only, beam_search=beam_search, beam_size=FLAGS.beam_size, category=FLAGS.category, use_emb=FLAGS.use_emb, use_autoEM=FLAGS.use_autoEM, use_imemory=FLAGS.use_imemory, use_ememory=FLAGS.use_ememory, emotion_size=FLAGS.emotion_size, imemory_size=FLAGS.imemory_size, dtype=dtype) see_variable = True if see_variable == True: for i in tf.all_variables(): print(i.name, i.get_shape()) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) pre_ckpt = tf.train.get_checkpoint_state(FLAGS.pretrain_dir) if ckpt: if FLAGS.load_model == 0: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: path = ckpt.model_checkpoint_path[:ckpt.model_checkpoint_path. find('-') + 1] + str( FLAGS.load_model) print("Reading model parameters from %s" % path) model.saver.restore(session, path) else: if pre_ckpt: session.run(tf.initialize_variables(model.initial_var)) if FLAGS.pretrain > -1: path = pre_ckpt.model_checkpoint_path[:pre_ckpt. model_checkpoint_path. find('-') + 1] + str( FLAGS.pretrain) print("Reading pretrain model parameters from %s" % path) model.pretrain_saver.restore(session, path) else: print("Reading pretrain model parameters from %s" % pre_ckpt.model_checkpoint_path) model.pretrain_saver.restore(session, pre_ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") session.run(tf.initialize_all_variables()) vec_post, vec_response = data_utils.get_word_embedding( FLAGS.data_dir, FLAGS.post_vocab_size, FLAGS.response_vocab_size) # 40000*100 initvec_post = tf.constant(vec_post, dtype=dtype, name='init_wordvector_post') initvec_response = tf.constant(vec_response, dtype=dtype, name='init_wordvector_response') embedding_post = [ x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/RNN/EmbeddingWrapper/embedding:0' ][0] embedding_response = [ x for x in tf.trainable_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/embedding:0' ][0] session.run(embedding_post.assign(initvec_post)) session.run(embedding_response.assign(initvec_response)) if FLAGS.use_ememory: vec_ememory = data_utils.get_ememory( FLAGS.data_dir, FLAGS.response_vocab_size) # 6*40000 initvec_ememory = tf.constant(vec_ememory, dtype=dtype, name='init_ememory') ememory = [ x for x in tf.all_variables() if x.name == 'embedding_attention_seq2seq/embedding_attention_decoder/external_memory:0' ][0] session.run(ememory.assign(initvec_ememory)) if FLAGS.use_autoEM: senti_embedding, grammar_embedding = data_utils.get_pretrained_embedding( FLAGS.data_dir, FLAGS.post_vocab_size) initvec_senti = tf.constant(senti_embedding, dtype=dtype, name='initvec_senti') initvec_grammar = tf.constant(grammar_embedding, dtype=dtype, name='initvec_grammar') senti_tensor = [ x for x in tf.trainable_variables() if x.name == 'classify_model_with_buckets/senti_embed:0' ][0] grammar_tensor = [ x for x in tf.trainable_variables() if x.name == 'classify_model_with_buckets/grammar_embed:0' ][0] session.run(senti_tensor.assign(initvec_senti)) session.run(grammar_tensor.assign(initvec_grammar)) return model