agent = Agent('eval/prim.pth') #save path save_path='prim_result/' utils.check_dirs([save_path]) shape_infopack=['demo', 'eval/demo-16.binvox', 'eval/demo-64.binvox', 'rgb', 'demo.png'] s, box, step = env.reset(shape_infopack) acm_r=0 step_interval=20 while True: valid_mask = env.get_valid_action_mask(box) a = agent.choose_action(s, box, step, valid_mask, 1.0) s_, box_, step_, r, done = env.next(a) acm_r+=r if env.step_count%step_interval==0: log_info='demo_step_'+str(env.step_count) print(log_info) env.output_result(log_info, save_path) if done: log_info='demo_finish_reward_'+str(acm_r) print(log_info) env.output_result(log_info, save_path) env.save_edgeloop(save_path) break
n_total = 0 for n in range(n_games): if not n == 0: scores.append(score) avg_rewards.append(total_reward / n_moves) agent.learn(n_total, n_total + n_moves) score = 0 game.score = 0 total_reward = 0 n_moves = 0 n_total = n_moves+n_total done = False observation = game.get_states() while not done: action = agent.choose_action(observation) if n < -1: game.draw() action = int(msvcrt.getch().decode("utf-8")) observation_, reward, done, score = game.spin_once(action, n_moves) agent.store_data(observation, action, reward, observation_, done) observation = observation_ total_reward += reward n_moves += 1 #game.draw() if score > score_max: print(score) score_max = score print(f"max score: {score_max}") if n % 100 == 0:
# alpha beta tau batch_size score_record = [] score_record_step = [] count_record = [] count_record_step = [] time_record = [] time_record_step = [] for i in range(800): done = False score = 0 obs = env.get_init_state() # 没分配完 while not done: act = MECSnet.choose_action(obs) new_state, reward, done = env.step(act) MECSnet.remember(obs, act, reward, new_state, int(done)) MECSnet.learn() score += reward obs = new_state # print('reward is: {}'.format(reward)) # 本轮的reward追加到list中 score_record.append(score) # print('episode ', i, 'score %.2f' % score, '100 game average %.2f' % np.mean(score[-100:])) print('episode ', i, 'score %.2f' % score, " wrong: ", env.count_wrong) count_record.append(1 - env.count_wrong / num_task) time_record.append(env.time) if i % 25 == 0: # UEnet.save_models()