def evaluate(name, environment, n=0, uncertainty=0): disablePrint() name = name + '-' + str(n) agent = pickle.load( open(f"outputs/{'-'.join(name.split('-')[:-1])}/Agents/{name}.agent", "rb")) agent.explore = agent.exploration.greedy agent.uncertainty = True agent.state_avoidance = False agent.uncertainty_weight = uncertainty env = Environments(render=False, envs=[environment for _ in range(20)], agent=agent) rews, dones = [], [] for i in range(20000): obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn, _, _ = agent.chooseMulti(obs, hn, cn) obs, rew, done, info = env.step(act, hn, cn) rews.append(sum(rew)) dones.append(sum(done)) enablePrint() score = sum(rews) / sum(dones) print( name.ljust(20, ' '), int(100 * (score - rMin[environment]) / (rMax[environment] - rMin[environment])) / 100, " ", score)
def main(): name, environment, hours, total_agents, done = params['name'], params['environment'], params['hours'], params['total_agents'], None agent = Agent(**params) env = Environments(render=False, envs=[environment for _ in range(total_agents)], agent=agent) collector = Collector(**params) tid, f = time() + 3600 * hours - 300, 0 while time() < tid: f += 1 obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn, before_trace, after_trace = agent.chooseMulti(obs, hn, cn, done=done) obs, rew, done, info = env.step(act, hn, cn) collector.collect(rew, done, act, agent.onpolicy) if not agent.onpolicy and f > 10: agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done, before_trace, after_trace) agent.learn() saveAgent(agent, name) saveCollector(collector, name)
'bigfish', 'bossfight', 'caveflyer', 'chaser', 'climber', 'coinrun', 'dodgeball', 'fruitbot', 'heist', 'jumper', 'leaper', 'maze', 'miner', 'ninja', 'plunder', 'starpilot' ], agent=agent) network = NetWork().to(device) print("Number of parameters in network:", count_parameters(network)) print("Number of parameters in encoder:", count_parameters(network.encoder)) print("Number of parameters in decoder:", count_parameters(network.decoder)) criterion = MSELoss() optimizer = Adam(network.parameters(), lr=1e-4, weight_decay=1e-5) for f in range(0, 10000000): obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn = agent.chooseMulti(obs, hn, cn) obs, rew, done, info = env.step(act, hn, cn) agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done) obs, action, obs_next, reward, h0, c0, hn, sn, done = agent.memory.sample( 256) guess = network(obs) loss = criterion(guess, obs) loss.backward() optimizer.step() optimizer.zero_grad() if showPrint: plt.close('all') imageBig(obs[0].cpu(), y=200, x=600) imageBig(guess[0].detach().cpu().clamp(-1, 1), y=200, x=1200) showPrint = False if save: with open(f"Encoders/encoder{f}.obj", "wb") as file: