def experiment1_test( output_folder, word_vectors, agent, episode_index, testset_path='./dataset/conll2003/en/eng.testb', ): # 初始化环境 env = Env(testset_path, word_vectors) step = 0 s = env.reset() print('[' + util.now_time() + "] start testing...") while True: # check task is ended if env.end(): print('[' + util.now_time() + "] testing...done") result_file = '%03d_episode_test.txt' % (episode_index + 1) env.save_all_newlines_to_file(output_folder, result_file) return evaluate.conlleval(output_folder, result_file) # Choose Action a a = agent.choose_action(s) # Execute action s_, r = env.step(a) # Next status step += 1 s = s_
def experiment1_train( output_folder, word_vectors, n_episodes=300, trainset_path='./dataset/conll2003/en/eng.train', ): # 初始化环境 print('[' + util.now_time() + "] init environment...") env = Env(trainset_path, word_vectors) print('[' + util.now_time() + "] 环境初始化完毕") # 初始化DQN print('[' + util.now_time() + "] init agent...") agent = DQN(n_actions=env.n_actions, status_dim=env.status_dim, action_dim=env.action_dim, reward_dim=env.reward_dim) print('[' + util.now_time() + "] agent初始化完毕") # 迭代episodes for i in range(n_episodes): print('[' + util.now_time() + "] start episode %03d of learning..." % (i + 1)) step = 0 s = env.reset() while True: # check task is ended if env.end(): print('[' + util.now_time() + "] episode %03d of learning...done" % (i + 1)) result_file = '%03d_episode_train.txt' % (i + 1) env.save_all_newlines_to_file(output_folder, result_file) train_eval = evaluate.conlleval(output_folder, result_file) test_eval = experiment1_test(output_folder, word_vectors, agent, i) break # Choose Action a a = agent.choose_action(s) # Execute action # print('step %d' % step) s_, r = env.step(a) agent.store_transition(s, a, r, s_) step += 1 s = s_ if step > 200 and step % 5 == 0: agent.learn() # plot and compare train and test set TODO # plot(train_evals,test_evals) agent.eval_network.save(output_folder + os.path.sep + 'ex1_eval_model', overwrite=True)
if step > args.warmup_steps and args.mode == 'train' and ep_steps % 4 == 0: batch = memory.get_minibatch(args.batch_sz) agent.train(batch, step) agent.train_target() ep_rewards += r else: s2, r, term, info = env.step(0.0) ep_steps += 1 step += 1 s = s2 episode += 1 print('# %d, steps: %d, ep_steps: %d, ep_r: %.4f, eps: %.4f' % \ (episode+1, step, ep_steps, ep_rewards, epsilon)) if args.mode == 'train': summ = sess.run(score_summ, feed_dict={score_holder: ep_rewards}) writer.add_summary(summ, global_step=episode) writer.flush() if step % args.save_freq == 0 and args.mode == 'train': path = saver_path + '/iter_' + str(step) + '.ckpt' agent.save(path) if args.mode == 'train': path = saver_path + '/final_' + str(step) + '.ckpt' agent.save(path) env.end() print('Finish training')