#dqn agent action = agent_dqn.act(state) action = action.values() reward = infrastructure.monkey(action) if infrastructure.attempts < infrastructure.peers: agent_dqn.observe(reward=reward, terminal=False) else: agent_dqn.observe(reward=reward, terminal=True) rl_dqn.append(reward) #trpo agent action = agent_vpg.act(state) action = action.values() reward = infrastructure.monkey(action) if infrastructure.attempts < infrastructure.peers: agent_vpg.observe(reward=reward, terminal=False) else: agent_vpg.observe(reward=reward, terminal=True) rl_vpg.append(reward) #monkey action = [ randint(1, len(infrastructure.peers) - 1)
def get_agent(game, agentType): count = 1 base_path = '.' checkpointPath = base_path + "/games/agents/" + game + "/" + agentType + "/" if agentType == "vpg": agent = VPGAgent( states=config[game]["states"], actions=config[game]["actions"], memory=1000, network="auto", ) elif agentType == "ppo": agent = PPOAgent( states=config[game]["states"], actions=config[game]["actions"], memory=1000, network="auto", ) elif agentType == "dqn": agent = DQNAgent( states=config[game]["states"], actions=config[game]["actions"], memory=1000, network="auto", ) if game == "3pd": try: agent.restore(directory=checkpointPath, filename=None) print("restoration successful") except Exception as e: agent.initialize() for x in tqdm(range(1000001)): testState = np.full(config[game]["states"]["shape"], None) for i in range(10): moveA = agent.act(testState) moveB = agent.act(testState) moveC = agent.act(testState) rewards = payoffs(game, [moveA, moveB, moveC]) if i < 9: agent.observe(reward=rewards[0], terminal=False) agent.observe(reward=rewards[1], terminal=False) agent.observe(reward=rewards[2], terminal=False) else: agent.observe(reward=rewards[0], terminal=False) agent.observe(reward=rewards[1], terminal=False) agent.observe(reward=rewards[2], terminal=True) testState[i] = [[moveA], [moveB], [moveC]] if x % 1000 == 0: # checkpointPath = "../games/agents/" + game + "/" + agentType + "/" agent.save(directory=checkpointPath, filename=None) # print("saving successful") else: try: agent.restore(directory=checkpointPath, filename=None) print("restoration successful") except Exception as e: # try: # checkpointPath = base_path + "/agents/" + game + "/" + agentType + "/" # agent.restore(directory=checkpointPath, filename=None) # print("restoration successful after second attempt") # except Exception as e: # a = subprocess.check_output("ls games/", shell=True) # print(a) # print(os.getcwd(), "vs", subprocess.check_output("pwd", shell=True)) # checkpointPath = "./games/agents/" + game + "/" + agentType + "/" # print(checkpointPath) # agent.restore(directory=checkpointPath, filename=None) # print("restoration successful after third attempt") agent.initialize() for x in tqdm(range(count)): testState = np.full(config[game]["states"]["shape"], 0) for i in range(10): moveA = agent.act(testState) moveB = agent.act(testState) rewards = payoffs(game, [moveA, moveB]) if i < 10: agent.observe(reward=rewards[0], terminal=False) agent.observe(reward=rewards[1], terminal=False) else: agent.observe(reward=rewards[0], terminal=False) agent.observe(reward=rewards[1], terminal=True) testState[i] = [[moveA], [moveB]] checkpointPath = "./games/agents/" + game + "/" + agentType + "/" agent.save(directory=checkpointPath, filename=None) print("saving successful") return agent
# agg_sum = agg_sum + reward # agg_rewards.append (agg_sum/timestep) # rewards.append(reward) # if timestep % 1000 == 0: # plotter(timestep) # # agg_rewards = [] # rewards = [] # timestep = 0 startstep = 0 timestep = 1 model_name = 'VPG' while timestep < MAXSTEPS and len(durations) <= 100: action = VPG_agent.act(observation) observation, done, reward = env.execute(action) if reward == 0.0: ep_done = True else: ep_done = False if ep_done: print(timestep - startstep, ' steps needed to finish this episode') durations.append(timestep - startstep) startstep = timestep + 1 # Add experience, agent automatically updates model according to batch size VPG_agent.observe(reward=reward, terminal=done) if ep_done: env.reset() #reinitialize ep_done = False agg_sum = agg_sum + reward