# INITIALIZE THE MAIN AGENT CLASS ai = MERL(args) print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim, 'Action_dim', args.action_dim) time_start = time.time() ###### TRAINING LOOP ######## for gen in range(1, 10000000000): # RUN VIRTUALLY FOREVER # ONE EPOCH OF TRAINING popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker) # PRINT PROGRESS print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:', mod.list_stat(pg_fits), 'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:', pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'PS:', args.ps ) if gen % 5 == 0: print() print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG: ', args.savetag) print('Weight Stats: min/max/average', pprint(ai.test_bucket[0].get_norm_stats())) print('Buffer Lens:', [ag.buffer[0].__len__() for ag in ai.agents] if args.ps == 'trunk' else [ag.buffer.__len__() for ag in ai.agents]) print() if gen % 10 == 0 and args.rollout_size > 0: print() print('Q', pprint(ai.agents[0].algo.q)) print('Q_loss', pprint(ai.agents[0].algo.q_loss)) print('Policy', pprint(ai.agents[0].algo.policy_loss))
'Gen', gen, 'Frames', agent.total_frames, ' Pop_max/max_ever:', '%.2f' % best_score, '/', '%.2f' % agent.best_score, ' Avg:', '%.2f' % frame_tracker.all_tracker[0][1], ' Frames/sec:', '%.2f' % (agent.total_frames / (time.time() - time_start)), ' Champ_len', '%.2f' % test_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), 'savetag', SAVETAG, ) # # PRINT MORE DETAILED STATS PERIODICALLY if gen % 5 == 0: print('Learner Fitness', [utils.pprint(learner.value) for learner in agent.portfolio], 'Sum_stats_resource_allocation', [learner.visit_count for learner in agent.portfolio]) print('Pop/rollout size', args.pop_size, '/', args.rollout_size, 'gradperstep', args.gradperstep, 'Seed', SEED, 'Portfolio_id', PORTFOLIO_ID) try:
# INITIALIZE THE MAIN AGENT CLASS ai = MERL(args) print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim, 'Action_dim', args.action_dim) time_start = time.time() ###### TRAINING LOOP ######## for gen in range(1, 10000000000): # RUN VIRTUALLY FOREVER # ONE EPOCH OF TRAINING popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker) # PRINT PROGRESS print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:', mod.list_stat(pg_fits), 'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:', pprint(ai.total_frames / (time.time() - time_start))) if gen % 5 == 0: print() print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG: ', args.savetag) print('Weight Stats: min/max/average', pprint(ai.test_bucket[0].get_norm_stats())) print('Buffer Lens:', [ag.buffer[0].__len__() for ag in ai.agents]) print() if gen % 10 == 0 and args.rollout_size > 0: print() print('Q', pprint(ai.agents[0].algo.q)) print('Q_loss', pprint(ai.agents[0].algo.q_loss)) print('Policy', pprint(ai.agents[0].algo.policy_loss)) print('########################################################################')
print('Running', parameters.algo, ' State_dim:', parameters.state_dim, ' Action_dim:', parameters.action_dim, 'for', 'Round 1' if DIFFICULTY == 0 else 'Round 2') time_start = time.time() num_frames = 0.0 ###### TRAINING LOOP ######## for epoch in range(1, 1000000000): #RUN VIRTUALLY FOREVER gen_time = time.time() #ONE EPOCH OF TRAINING agent.train(epoch) #PRINT PROGRESS print('Ep:', epoch, 'Score cur/best:', [pprint(score) for score in agent.test_score], pprint(agent.best_score), 'Time:', pprint(time.time() - gen_time), 'Len', pprint(agent.test_len), 'Best_action_noise_score', pprint(agent.best_action_noise_score), 'Best_Agent_scores', [pprint(score) for score in agent.best_agent_scores]) #PRINT MORE DETAILED STATS PERIODICALLY if epoch % 5 == 0: #Special Stats print() print('#Data_Created', agent.buffer_added, 'Q_Val Stats', pprint(list_mean(agent.rl_agent.q['min'])), pprint(list_mean(agent.rl_agent.q['max'])), pprint(list_mean(agent.rl_agent.q['mean'])), 'Val Stats', pprint(list_mean(agent.rl_agent.val['min'])), pprint(list_mean(agent.rl_agent.val['max'])),
self.buffers[rover_id].push(exp[0], exp[1], exp[2], exp[3], exp[4]) self.update_budget += 1 if __name__ == "__main__": args = Parameters() # Create the Parameters class gen_tracker = utils.Tracker(args.metric_save, [args.log_fname], '.csv') # Initiate tracker torch.manual_seed(SEED) np.random.seed(SEED) random.seed(SEED) #Seeds # INITIALIZE THE MAIN AGENT CLASS ai = IDPP(args) print(' State_dim:', args.state_dim) time_start = time.time() ###### TRAINING LOOP ######## for gen in range(1, 1000000000): #RUN VIRTUALLY FOREVER gen_time = time.time() #ONE EPOCH OF TRAINING ai.train(gen, gen_tracker) #PRINT PROGRESS print('Ep:', gen, 'Score: cur/best:', pprint(ai.test_score), pprint(ai.best_score), 'Time:', pprint(time.time() - gen_time), 'Best_rollout_score', pprint(ai.best_rollout_score), 'DPP', DPP)
# INITIALIZE THE MAIN AGENT CLASS ai = MERL(args) print('Running ', args.config.env_choice, 'with config ', args.config.config, ' Predator State_dim:', args.pred_state_dim, 'Prey_state_dim', args.prey_state_dim, 'Action_dim', args.action_dim) time_start = time.time() ###### TRAINING LOOP ######## for gen in range(1, 10000000000): # RUN VIRTUALLY FOREVER # ONE EPOCH OF TRAINING popn_fits, pg_fits, test_fits, prey_score = ai.train(gen, test_tracker, prey_tracker) # PRINT PROGRESS print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:', mod.list_stat(pg_fits), 'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:', pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'Prey Score:', prey_score) #Update elites tracker if gen >2 and args.popn_size > 0: #elites_tracker.update([ai.agents[0].evolver.rl_res['elites']], gen) selects_tracker.update([ai.agents.evolver.rl_res['selects']], gen) if ai.total_frames > args.frames_bound: break ###Kill all processes try: ai.pg_task_pipes[0].send('TERMINATE') except: None try: ai.test_task_pipes[0].send('TERMINATE') except: None
torch.manual_seed(args.seed); np.random.seed(args.seed); random.seed(args.seed) #INITIALIZE THE MAIN AGENT CLASS agent = CERL_Agent(args) #Initialize the agent print('Running CERL for', ENV_NAME, 'State_dim:', args.state_dim, ' Action_dim:', args.action_dim) time_start = time.time() for gen in range(1, 1000000000): #Infinite generations #Train one iteration best_score, test_len, all_fitness, all_eplen, test_mean, test_std, champ_wwid = agent.train(gen, frame_tracker) #PRINT PROGRESS print('Env', ENV_NAME, 'Gen', gen, 'Frames', agent.total_frames, ' Pop_max/max_ever:','%.2f'%best_score, '/','%.2f'%agent.best_score, ' Avg:','%.2f'%frame_tracker.all_tracker[0][1], ' Frames/sec:','%.2f'%(agent.total_frames/(time.time()-time_start)), ' Champ_len', '%.2f'%test_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), 'savetag', SAVETAG, ) # # PRINT MORE DETAILED STATS PERIODICALLY if gen % 5 == 0: print('Learner Fitness', [utils.pprint(learner.value) for learner in agent.portfolio], 'Sum_stats_resource_allocation', [learner.visit_count for learner in agent.portfolio]) print('Pop/rollout size', args.pop_size,'/',args.rollout_size, 'gradperstep', args.gradperstep, 'Seed', SEED, 'Portfolio_id', PORTFOLIO_ID) try: print('Best Policy ever genealogy:', agent.genealogy.tree[int(agent.best_policy.wwid.item())].history) print('Champ genealogy:', agent.genealogy.tree[champ_wwid].history) except: None print() max_tracker.update([best_score], agent.total_frames) if agent.total_frames > TOTAL_STEPS: break