def AC_main(): #agent = PGagent(agentParam) all_rw = [] #n_agents = 1#2 if mode == "social": multiPG = socialAgents([ PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i)) for i in range(n_agents) ], agentParam) elif mode == "AC": multiPG = AC_Agents([IAC(8, 400) for i in range(n_agents)]) else: multiPG = independentAgent([ PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i)) for i in range(n_agents) ]) for i_episode in range(n_episode): n_state, ep_reward = env.reset(), 0 for t in range(n_steps): if mode == "social": actions = multiPG.select_mask_actions(n_state) else: actions = multiPG.select_actions( n_state) ##agent.select_action(state) #actions = [ random.randint(0,7) for i in range(n_agents)] n_state_, n_reward, _, _ = env.step(actions) if render and i_episode == 1: env.render(impath, t) #multiPG.push_reward(n_reward) ep_reward += sum(n_reward) multiPG.update(n_state, n_reward, n_state_, actions) running_reward = ep_reward #if test_mode == False: # multiPG.update_agents() all_rw.append(ep_reward) if i_episode % (args.log_interval * 2) == 0 and ifsave_data: np.save("data/" + model_name + ".npy", all_rw) if i_episode % args.log_interval == 0: print('Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}'. format(i_episode, ep_reward, running_reward)) if i_episode % save_eps == 0 and i_episode > 10 and ifsave_model: multiPG.save(file_name)
def AC_main(): #agent = PGagent(agentParam) all_rw = [] #n_agents = 1#2 if mode == "social": multiPG = socialAgents([ PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i)) for i in range(n_agents) ], agentParam) elif mode == "AC": multiPG = Agents([Centralised_AC(4, 100) for i in range(n_agents)], 50) else: multiPG = independentAgent([ PGagent(env_dim["cleanup"][0], env_dim["cleanup"][1], add_para(i)) for i in range(n_agents) ]) for i_episode in range(1000): n_state, ep_reward = env.reset(), 0 n_state = n_state[0] test_reward_sum = 0 for t in range(1000): if mode == "social": actions = multiPG.select_mask_actions(n_state) else: actions = multiPG.choose_action( process_state(n_state)) ##agent.select_action(state) #actions = [ random.randint(0,7) for i in range(n_agents)] a = deepcopy(actions) for i in range(len(actions)): a[i] = [actions[i][0]] n_state_, n_reward, _, _, test_reward = env.step(a) test_reward_sum += test_reward if render and i_episode != 1: # env.render(impath,t) env.render() # time.sleep(0.05) #multiPG.push_reward(n_reward) ep_reward += sum(n_reward) # if [1] in process_state(n_reward): # print("i_episode %d:"%i_episode,process_state(n_reward)) multiPG.update(process_state(n_state), process_state(n_reward), process_state(n_state_), actions) n_state = n_state_ running_reward = ep_reward #if test_mode == False: # multiPG.update_agents() all_rw.append(ep_reward) if i_episode % (args.log_interval * 2) == 0 and ifsave_data: np.save("data/" + model_name + ".npy", all_rw) if i_episode % args.log_interval == 0: print( 'Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}\ttest_reward: {:.2f}' .format(i_episode, ep_reward[0], running_reward[0], test_reward_sum)) logger.scalar_summary("ep_reward", ep_reward, i_episode) logger.scalar_summary("coin_eaten", test_reward_sum, i_episode) if i_episode % save_eps == 0 and i_episode > 10 and ifsave_model: multiPG.save(file_name)