Ejemplo n.º 1
0
def evaluate(name, environment, n=0, uncertainty=0):
    disablePrint()
    name = name + '-' + str(n)
    agent = pickle.load(
        open(f"outputs/{'-'.join(name.split('-')[:-1])}/Agents/{name}.agent",
             "rb"))
    agent.explore = agent.exploration.greedy
    agent.uncertainty = True
    agent.state_avoidance = False
    agent.uncertainty_weight = uncertainty
    env = Environments(render=False,
                       envs=[environment for _ in range(20)],
                       agent=agent)
    rews, dones = [], []
    for i in range(20000):
        obs, hn, cn = env.start()
        act, obs_old, h0, c0, hn, cn, _, _ = agent.chooseMulti(obs, hn, cn)
        obs, rew, done, info = env.step(act, hn, cn)
        rews.append(sum(rew))
        dones.append(sum(done))
    enablePrint()
    score = sum(rews) / sum(dones)
    print(
        name.ljust(20, ' '),
        int(100 * (score - rMin[environment]) /
            (rMax[environment] - rMin[environment])) / 100, "  ", score)
Ejemplo n.º 2
0
 def main():
     name, environment, hours, total_agents, done = params['name'], params['environment'], params['hours'], params['total_agents'], None
     agent = Agent(**params)
     env = Environments(render=False, envs=[environment for _ in range(total_agents)], agent=agent)
     collector = Collector(**params)
     tid, f = time() + 3600 * hours - 300, 0
     while time() < tid:
         f += 1
         obs, hn, cn = env.start()
         act, obs_old, h0, c0, hn, cn, before_trace, after_trace = agent.chooseMulti(obs, hn, cn, done=done)
         obs, rew, done, info = env.step(act, hn, cn)
         collector.collect(rew, done, act, agent.onpolicy)
         if not agent.onpolicy and f > 10:
             agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done, before_trace, after_trace)
         agent.learn()
     saveAgent(agent, name)
     saveCollector(collector, name)
Ejemplo n.º 3
0
env = Environments(render=True,
                   envs=[
                       'bigfish', 'bossfight', 'caveflyer', 'chaser',
                       'climber', 'coinrun', 'dodgeball', 'fruitbot', 'heist',
                       'jumper', 'leaper', 'maze', 'miner', 'ninja', 'plunder',
                       'starpilot'
                   ],
                   agent=agent)
network = NetWork().to(device)
print("Number of parameters in network:", count_parameters(network))
print("Number of parameters in encoder:", count_parameters(network.encoder))
print("Number of parameters in decoder:", count_parameters(network.decoder))
criterion = MSELoss()
optimizer = Adam(network.parameters(), lr=1e-4, weight_decay=1e-5)
for f in range(0, 10000000):
    obs, hn, cn = env.start()
    act, obs_old, h0, c0, hn, cn = agent.chooseMulti(obs, hn, cn)
    obs, rew, done, info = env.step(act, hn, cn)
    agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done)
    obs, action, obs_next, reward, h0, c0, hn, sn, done = agent.memory.sample(
        256)
    guess = network(obs)
    loss = criterion(guess, obs)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    if showPrint:
        plt.close('all')
        imageBig(obs[0].cpu(), y=200, x=600)
        imageBig(guess[0].detach().cpu().clamp(-1, 1), y=200, x=1200)
        showPrint = False