Exemple #1
0
        if infrastructure.attempts < infrastructure.peers:
            agent_dqn.observe(reward=reward, terminal=False)
        else:
            agent_dqn.observe(reward=reward, terminal=True)

        rl_dqn.append(reward)

        #trpo agent
        action = agent_vpg.act(state)
        action = action.values()

        reward = infrastructure.monkey(action)

        if infrastructure.attempts < infrastructure.peers:
            agent_vpg.observe(reward=reward, terminal=False)
        else:
            agent_vpg.observe(reward=reward, terminal=True)

        rl_vpg.append(reward)

        #monkey
        action = [
            randint(1,
                    len(infrastructure.peers) - 1)
            for x in range(int(len(infrastructure.peers) * .2))
        ]
        reward = infrastructure.monkey(action)

        #print("monkey", action)
def get_agent(game, agentType):
    count = 1

    base_path = '.'
    checkpointPath = base_path + "/games/agents/" + game + "/" + agentType + "/"

    if agentType == "vpg":
        agent = VPGAgent(
            states=config[game]["states"],
            actions=config[game]["actions"],
            memory=1000,
            network="auto",
        )
    elif agentType == "ppo":
        agent = PPOAgent(
            states=config[game]["states"],
            actions=config[game]["actions"],
            memory=1000,
            network="auto",
        )
    elif agentType == "dqn":
        agent = DQNAgent(
            states=config[game]["states"],
            actions=config[game]["actions"],
            memory=1000,
            network="auto",
        )

    if game == "3pd":
        try:
            agent.restore(directory=checkpointPath, filename=None)
            print("restoration successful")
        except Exception as e:
            agent.initialize()
            for x in tqdm(range(1000001)):
                testState = np.full(config[game]["states"]["shape"], None)

                for i in range(10):
                    moveA = agent.act(testState)
                    moveB = agent.act(testState)
                    moveC = agent.act(testState)
                    rewards = payoffs(game, [moveA, moveB, moveC])
                    if i < 9:
                        agent.observe(reward=rewards[0], terminal=False)
                        agent.observe(reward=rewards[1], terminal=False)
                        agent.observe(reward=rewards[2], terminal=False)
                    else:
                        agent.observe(reward=rewards[0], terminal=False)
                        agent.observe(reward=rewards[1], terminal=False)
                        agent.observe(reward=rewards[2], terminal=True)
                    testState[i] = [[moveA], [moveB], [moveC]]
                if x % 1000 == 0:
                    # checkpointPath = "../games/agents/" + game + "/" + agentType + "/"
                    agent.save(directory=checkpointPath, filename=None)
                    # print("saving successful")
    else:
        try:
            agent.restore(directory=checkpointPath, filename=None)
            print("restoration successful")
        except Exception as e:
            # try:
            # 	checkpointPath = base_path + "/agents/" + game + "/" + agentType + "/"
            # 	agent.restore(directory=checkpointPath, filename=None)
            # 	print("restoration successful after second attempt")
            # except Exception as e:
            # 	a = subprocess.check_output("ls games/", shell=True)
            # 	print(a)
            # 	print(os.getcwd(), "vs", subprocess.check_output("pwd", shell=True))
            # 	checkpointPath = "./games/agents/" + game + "/" + agentType + "/"
            # 	print(checkpointPath)
            # 	agent.restore(directory=checkpointPath, filename=None)
            # 	print("restoration successful after third attempt")
            agent.initialize()

            for x in tqdm(range(count)):

                testState = np.full(config[game]["states"]["shape"], 0)

                for i in range(10):
                    moveA = agent.act(testState)
                    moveB = agent.act(testState)
                    rewards = payoffs(game, [moveA, moveB])
                    if i < 10:
                        agent.observe(reward=rewards[0], terminal=False)
                        agent.observe(reward=rewards[1], terminal=False)
                    else:
                        agent.observe(reward=rewards[0], terminal=False)
                        agent.observe(reward=rewards[1], terminal=True)

                    testState[i] = [[moveA], [moveB]]
            checkpointPath = "./games/agents/" + game + "/" + agentType + "/"
            agent.save(directory=checkpointPath, filename=None)
            print("saving successful")

    return agent
    model_name = 'VPG'

    while timestep < MAXSTEPS and len(durations) <= 100:

        action = VPG_agent.act(observation)
        observation, done, reward = env.execute(action)
        if reward == 0.0:
            ep_done = True
        else:
            ep_done = False
        if ep_done:
            print(timestep - startstep, ' steps needed to finish this episode')
            durations.append(timestep - startstep)
            startstep = timestep + 1
        # Add experience, agent automatically updates model according to batch size
        VPG_agent.observe(reward=reward, terminal=done)
        if ep_done:
            env.reset()  #reinitialize
            ep_done = False
        agg_sum = agg_sum + reward
        agg_rewards.append(agg_sum / timestep)
        rewards.append(reward)
        if timestep % 1000 == 0:
            plotter(timestep,
                    model_name=model_name,
                    title='%s Rewards' % model_name,
                    save_step=1000)
        timestep = timestep + 1
    if timestep > MAXSTEPS:
        print("Timed out. Too many timesteps.")
    else: