Esempio n. 1
0
def experiment1_test(
    output_folder,
    word_vectors,
    agent,
    episode_index,
    testset_path='./dataset/conll2003/en/eng.testb',
):
    # 初始化环境
    env = Env(testset_path, word_vectors)
    step = 0
    s = env.reset()
    print('[' + util.now_time() + "] start testing...")
    while True:
        # check task is ended
        if env.end():
            print('[' + util.now_time() + "] testing...done")
            result_file = '%03d_episode_test.txt' % (episode_index + 1)
            env.save_all_newlines_to_file(output_folder, result_file)
            return evaluate.conlleval(output_folder, result_file)

        # Choose Action a
        a = agent.choose_action(s)

        # Execute action
        s_, r = env.step(a)

        # Next status
        step += 1
        s = s_
Esempio n. 2
0
def experiment1_train(
    output_folder,
    word_vectors,
    n_episodes=300,
    trainset_path='./dataset/conll2003/en/eng.train',
):
    # 初始化环境
    print('[' + util.now_time() + "] init environment...")
    env = Env(trainset_path, word_vectors)
    print('[' + util.now_time() + "] 环境初始化完毕")

    # 初始化DQN
    print('[' + util.now_time() + "] init agent...")
    agent = DQN(n_actions=env.n_actions,
                status_dim=env.status_dim,
                action_dim=env.action_dim,
                reward_dim=env.reward_dim)
    print('[' + util.now_time() + "] agent初始化完毕")

    # 迭代episodes
    for i in range(n_episodes):
        print('[' + util.now_time() + "] start episode %03d of learning..." %
              (i + 1))
        step = 0
        s = env.reset()

        while True:
            # check task is ended
            if env.end():
                print('[' + util.now_time() +
                      "] episode %03d of learning...done" % (i + 1))
                result_file = '%03d_episode_train.txt' % (i + 1)
                env.save_all_newlines_to_file(output_folder, result_file)
                train_eval = evaluate.conlleval(output_folder, result_file)
                test_eval = experiment1_test(output_folder, word_vectors,
                                             agent, i)
                break

            # Choose Action a
            a = agent.choose_action(s)

            # Execute action
            # print('step %d' % step)
            s_, r = env.step(a)

            agent.store_transition(s, a, r, s_)

            step += 1
            s = s_

            if step > 200 and step % 5 == 0:
                agent.learn()

    # plot and compare train and test set TODO
    # plot(train_evals,test_evals)
    agent.eval_network.save(output_folder + os.path.sep + 'ex1_eval_model',
                            overwrite=True)
Esempio n. 3
0
            if step > args.warmup_steps and args.mode == 'train' and ep_steps % 4 == 0:
                batch = memory.get_minibatch(args.batch_sz)
                agent.train(batch, step)
                agent.train_target()
            ep_rewards += r
        else:
            s2, r, term, info = env.step(0.0)
        ep_steps += 1
        step += 1
        s = s2

    episode += 1
    print('# %d, steps: %d, ep_steps: %d, ep_r: %.4f, eps: %.4f' % \
            (episode+1, step, ep_steps, ep_rewards, epsilon))

    if args.mode == 'train':
        summ = sess.run(score_summ, feed_dict={score_holder: ep_rewards})
        writer.add_summary(summ, global_step=episode)
        writer.flush()

    if step % args.save_freq == 0 and args.mode == 'train':
        path = saver_path + '/iter_' + str(step) + '.ckpt'
        agent.save(path)

if args.mode == 'train':
    path = saver_path + '/final_' + str(step) + '.ckpt'
    agent.save(path)

env.end()
print('Finish training')