Beispiel #1
0
def create_model(session, forward_only, beam_search):
    dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
    model = seq2seq_model.Seq2SeqModel(FLAGS.post_vocab_size,
                                       FLAGS.response_vocab_size,
                                       _buckets,
                                       FLAGS.size,
                                       FLAGS.num_layers,
                                       FLAGS.max_gradient_norm,
                                       FLAGS.batch_size,
                                       FLAGS.learning_rate,
                                       FLAGS.learning_rate_decay_factor,
                                       embedding_size=FLAGS.embedding_size,
                                       forward_only=forward_only,
                                       beam_search=beam_search,
                                       beam_size=FLAGS.beam_size,
                                       category=FLAGS.category,
                                       use_emb=FLAGS.use_emb,
                                       use_imemory=FLAGS.use_imemory,
                                       use_ememory=FLAGS.use_ememory,
                                       emotion_size=FLAGS.emotion_size,
                                       imemory_size=FLAGS.imemory_size,
                                       dtype=dtype)
    see_variable = True
    if see_variable == True:
        for i in tf.all_variables():
            print(i.name, i.get_shape())
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    pre_ckpt = tf.train.get_checkpoint_state(FLAGS.pretrain_dir)
    if ckpt:  #and tf.gfile.Exists(ckpt.model_checkpoint_path+".index"):
        if FLAGS.load_model == 0:
            print("Reading model parameters from %s" %
                  ckpt.model_checkpoint_path)
            model.saver.restore(session, ckpt.model_checkpoint_path)
        else:
            path = ckpt.model_checkpoint_path[:ckpt.model_checkpoint_path.
                                              find('-') + 1] + str(
                                                  FLAGS.load_model)
            print("Reading model parameters from %s" % path)
            model.saver.restore(session, path)
    else:
        if pre_ckpt:
            session.run(tf.initialize_variables(model.initial_var))
            if FLAGS.pretrain > -1:
                path = pre_ckpt.model_checkpoint_path[:pre_ckpt.
                                                      model_checkpoint_path.
                                                      find('-') + 1] + str(
                                                          FLAGS.pretrain)
                print("Reading pretrain model parameters from %s" % path)
                model.pretrain_saver.restore(session, path)
            else:
                print("Reading pretrain model parameters from %s" %
                      pre_ckpt.model_checkpoint_path)
                model.pretrain_saver.restore(session,
                                             pre_ckpt.model_checkpoint_path)
        else:
            print("Created model with fresh parameters.")
            session.run(tf.initialize_all_variables())

            with open('word2vec.npy', 'rb') as f:
                wordvec = np.load(f)
            vec_post = wordvec
            vec_response = wordvec
            initvec_post = tf.constant(vec_post,
                                       dtype=dtype,
                                       name='init_wordvector_post')
            initvec_response = tf.constant(vec_response,
                                           dtype=dtype,
                                           name='init_wordvector_response')
            embedding_post = [
                x for x in tf.trainable_variables() if x.name ==
                'embedding_attention_seq2seq/RNN/EmbeddingWrapper/embedding:0'
            ][0]
            embedding_response = [
                x for x in tf.trainable_variables() if x.name ==
                'embedding_attention_seq2seq/embedding_attention_decoder/embedding:0'
            ][0]
            session.run(embedding_post.assign(initvec_post))
            session.run(embedding_response.assign(initvec_response))
        if FLAGS.use_ememory:
            vec_ememory = data_utils.get_ememory(FLAGS.data_dir,
                                                 FLAGS.response_vocab_size)
            initvec_ememory = tf.constant(vec_ememory,
                                          dtype=dtype,
                                          name='init_ememory')
            ememory = [
                x for x in tf.all_variables() if x.name ==
                'embedding_attention_seq2seq/embedding_attention_decoder/external_memory:0'
            ][0]
            session.run(ememory.assign(initvec_ememory))
    return model
Beispiel #2
0
def create_model(session, forward_only, beam_search):
    dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
    model = seq2seq_model.Seq2SeqModel(FLAGS.post_vocab_size,
                                       FLAGS.response_vocab_size,
                                       _buckets,
                                       FLAGS.hidden_size,
                                       FLAGS.num_layers,
                                       FLAGS.max_gradient_norm,
                                       FLAGS.batch_size,
                                       FLAGS.learning_rate,
                                       FLAGS.learning_rate_decay_factor,
                                       embedding_size=FLAGS.embedding_size,
                                       forward_only=forward_only,
                                       beam_search=beam_search,
                                       beam_size=FLAGS.beam_size,
                                       category=FLAGS.category,
                                       use_emb=FLAGS.use_emb,
                                       use_autoEM=FLAGS.use_autoEM,
                                       use_imemory=FLAGS.use_imemory,
                                       use_ememory=FLAGS.use_ememory,
                                       emotion_size=FLAGS.emotion_size,
                                       imemory_size=FLAGS.imemory_size,
                                       dtype=dtype)
    see_variable = True
    if see_variable == True:
        for i in tf.all_variables():
            print(i.name, i.get_shape())
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    pre_ckpt = tf.train.get_checkpoint_state(FLAGS.pretrain_dir)
    if ckpt:
        if FLAGS.load_model == 0:
            print("Reading model parameters from %s" %
                  ckpt.model_checkpoint_path)
            model.saver.restore(session, ckpt.model_checkpoint_path)
        else:
            path = ckpt.model_checkpoint_path[:ckpt.model_checkpoint_path.
                                              find('-') + 1] + str(
                                                  FLAGS.load_model)
            print("Reading model parameters from %s" % path)
            model.saver.restore(session, path)
    else:
        if pre_ckpt:
            session.run(tf.initialize_variables(model.initial_var))
            if FLAGS.pretrain > -1:
                path = pre_ckpt.model_checkpoint_path[:pre_ckpt.
                                                      model_checkpoint_path.
                                                      find('-') + 1] + str(
                                                          FLAGS.pretrain)
                print("Reading pretrain model parameters from %s" % path)
                model.pretrain_saver.restore(session, path)
            else:
                print("Reading pretrain model parameters from %s" %
                      pre_ckpt.model_checkpoint_path)
                model.pretrain_saver.restore(session,
                                             pre_ckpt.model_checkpoint_path)
        else:
            print("Created model with fresh parameters.")
            session.run(tf.initialize_all_variables())
            vec_post, vec_response = data_utils.get_word_embedding(
                FLAGS.data_dir, FLAGS.post_vocab_size,
                FLAGS.response_vocab_size)  # 40000*100
            initvec_post = tf.constant(vec_post,
                                       dtype=dtype,
                                       name='init_wordvector_post')
            initvec_response = tf.constant(vec_response,
                                           dtype=dtype,
                                           name='init_wordvector_response')
            embedding_post = [
                x for x in tf.trainable_variables() if x.name ==
                'embedding_attention_seq2seq/RNN/EmbeddingWrapper/embedding:0'
            ][0]
            embedding_response = [
                x for x in tf.trainable_variables() if x.name ==
                'embedding_attention_seq2seq/embedding_attention_decoder/embedding:0'
            ][0]
            session.run(embedding_post.assign(initvec_post))
            session.run(embedding_response.assign(initvec_response))
        if FLAGS.use_ememory:
            vec_ememory = data_utils.get_ememory(
                FLAGS.data_dir, FLAGS.response_vocab_size)  # 6*40000
            initvec_ememory = tf.constant(vec_ememory,
                                          dtype=dtype,
                                          name='init_ememory')
            ememory = [
                x for x in tf.all_variables() if x.name ==
                'embedding_attention_seq2seq/embedding_attention_decoder/external_memory:0'
            ][0]
            session.run(ememory.assign(initvec_ememory))

        if FLAGS.use_autoEM:
            senti_embedding, grammar_embedding = data_utils.get_pretrained_embedding(
                FLAGS.data_dir, FLAGS.post_vocab_size)
            initvec_senti = tf.constant(senti_embedding,
                                        dtype=dtype,
                                        name='initvec_senti')
            initvec_grammar = tf.constant(grammar_embedding,
                                          dtype=dtype,
                                          name='initvec_grammar')
            senti_tensor = [
                x for x in tf.trainable_variables()
                if x.name == 'classify_model_with_buckets/senti_embed:0'
            ][0]
            grammar_tensor = [
                x for x in tf.trainable_variables()
                if x.name == 'classify_model_with_buckets/grammar_embed:0'
            ][0]
            session.run(senti_tensor.assign(initvec_senti))
            session.run(grammar_tensor.assign(initvec_grammar))

    return model