Ejemplo n.º 1
0
valdata = Dataset(val_df,
                  char_vocab,
                  user_vocab,
                  max_len=params.max_len,
                  batch_size=params.batch_size)

model = Model(params)
saver = tf.train.Saver(tf.global_variables())
config = tf.ConfigProto(inter_op_parallelism_threads=args.threads,
                        intra_op_parallelism_threads=args.threads)
session = tf.Session(config=config)
session.run(tf.global_variables_initializer())

avg_loss = MovingAvg(0.97)  # exponential moving average of the training loss
for idx in range(params.iters):
    feed_dict = dataset.GetFeedDict(model)
    feed_dict[model.dropout_keep_prob] = params.dropout

    c, _ = session.run([model.avg_loss, model.train_op], feed_dict)
    cc = avg_loss.Update(c)
    if idx % 50 == 0 and idx > 0:
        # test one batch from the validation set
        val_c = session.run(model.avg_loss, valdata.GetFeedDict(model))
        logging.info({'iter': idx, 'cost': cc, 'rawcost': c, 'valcost': val_c})
    if idx % 2000 == 0:  # save a model file every 2,000 minibatches
        saver.save(session,
                   os.path.join(expdir, 'model.bin'),
                   write_meta_graph=True,
                   global_step=idx)
        # gd = tf.graph_util.convert_variables_to_constants(sess, tf.get_default_graph().as_graph_def(), ['add'])
        # with tf.gfile.GFile('./tmodel/model.pb', 'wb') as f:
Ejemplo n.º 2
0
                    type=int,
                    default=12,
                    help='how many threads to use in tensorflow')
args = parser.parse_args()
expdir = args.expdir

# 模型加载
metamodel = MetaModel(expdir)
model = metamodel.model
metamodel.MakeSessionAndRestore(args.threads)
# 数据加载
df = LoadData(args.data)
dataset = Dataset(df,
                  metamodel.char_vocab,
                  metamodel.user_vocab,
                  max_len=metamodel.params.max_len)

total_word_count = 0
total_log_prob = 0
print(len(dataset.df), dataset.batch_size)  # 20999    24
for idx in range(0, int(len(dataset.df) / dataset.batch_size)):
    feed_dict = dataset.GetFeedDict(model)
    # 这里的session 是 获取的是 保存后的模型
    c, words_in_batch = metamodel.session.run(
        [model.avg_loss, model.words_in_batch], feed_dict)
    # c是 total_loss, words_in_batch 一个batch里字数
    total_word_count += words_in_batch  # 整个字数
    total_log_prob += float(c * words_in_batch)
    print('整体损失值: {0}\t{1:.3f}'.format(
        idx, np.exp(total_log_prob / total_word_count)))