def train(self, frame_limit): # Define Tracker class to track scores test_tracker = utils.Tracker(self.args.savefolder, ['score_' + self.args.savetag], '.csv') # Tracker class to log progress time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration max_fitness, champ_len, all_eplens, test_mean, test_std, rollout_fitness, rollout_eplens = self.forward_generation(gen, test_tracker) if test_mean: self.args.writer.add_scalar('test_score', test_mean, gen) print('Gen/Frames:', gen,'/',self.total_frames, ' Gen_max_score:', '%.2f'%max_fitness, ' Champ_len', '%.2f'%champ_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), ' Rollout_u/std:', utils.pprint(np.mean(np.array(rollout_fitness))), utils.pprint(np.std(np.array(rollout_fitness))), ' Rollout_mean_eplen:', utils.pprint(sum(rollout_eplens)/len(rollout_eplens)) if rollout_eplens else None) if gen % 5 == 0: print('Best_score_ever:''/','%.2f'%self.best_score, ' FPS:','%.2f'%(self.total_frames/(time.time()-time_start)), 'savetag', self.args.savetag) print() if self.total_frames > frame_limit: break ###Kill all processes try: for p in self.task_pipes: p[0].send('TERMINATE') for p in self.test_task_pipes: p[0].send('TERMINATE') for p in self.evo_task_pipes: p[0].send('TERMINATE') except: None
def pprint(self, string, label, color=None, labelColor=None): ''' formats the code so that the string is on the left and the label goes on the right side. with padding in the center ''' assert string, "there is no string value, or it is None!" assert lable, "there is no label or it is None!" pprint(string, label, color, labelColor)
def test_locally(): env = L2M(frameskip=FRAMESKIP, difficulty=0, action_clamp=False, visualize=False) state = torch.Tensor(env.reset()) time = 0 total_reward = 0 while True: time = time + FRAMESKIP action = net.clean_action(state).detach() hack_action = [] for i in range(22): action_i = action[0, i].item() #action_i = (action_i+1.0)/2.0 if action_i < 0: action_i = 0.0 # if action_i < -0.5: action_i = -1.0 # elif action_i > 0.5: action_i = 1.0 # else: action_i = 0.0 hack_action.append(action_i) state, reward, done, info = env.step(hack_action) state = torch.Tensor(state) total_reward += reward print('Seed', seed, 'Local Test', time, 'R1_Reward', '%.2f' % env.original_reward, 'Shaped_Reward', '%.2f' % total_reward, utils.pprint(hack_action)) if done: break
def train(self, frame_limit): # Define Tracker class to track scores test_tracker = utils.Tracker( self.args.savefolder, ['score_' + self.args.savetag, 'r1_' + self.args.savetag], '.csv') # Tracker class to log progress time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration test_mean, test_std = self.forward_epoch(gen, test_tracker) print('Gen/Frames', gen, '/', self.total_frames, 'max_ever:', '%.2f' % self.best_score, ' Avg:', '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:', '%.2f' % (self.total_frames / (time.time() - time_start)), ' Test/RolloutScore', '%.2f' % self.test_trace[-1], '%.2f' % self.rollout_fits_trace[-1], 'Ep_len', '%.2f' % self.ep_len, '#Footsteps', '%.2f' % self.num_footsteps, 'R1_Reward', '%.2f' % self.r1_reward, 'savetag', self.args.savetag) if gen % 5 == 0: print() print('Entropy', utils.pprint(math.exp(self.algo.entropy['mean'])), 'Next_Entropy', utils.pprint(math.exp(self.algo.next_entropy['mean'])), 'Q_Loss', utils.pprint(self.algo.critic_loss['mean']), 'Q', utils.pprint(self.algo.policy_q['mean']), 'Next_Q', utils.pprint(self.algo.next_q['mean'])) print() if self.total_frames > frame_limit: break
def train(self, frame_limit): time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration gen_best = self.forward_generation(gen, time_start) print() print('Gen/Frames', gen, '/', self.total_frames, 'Gen_Score', '%.2f' % gen_best, 'Best_Score', '%.2f' % self.best_score, ' Speedup', '%.2f' % self.best_speedup, ' Frames/sec:', '%.2f' % (self.total_frames / (time.time() - time_start)), 'Buffer', self.replay_buffer.__len__(), 'Savetag', self.args.savetag) for net in self.population: print(net.model_type, net.fitness_stats) if net.model_type == 'BoltzmanChromosome': print(net.temperature_stats) print() print() try: print('Initial Ratio', self.args.ratio, 'Current Ratio', self.evolver.ratio, 'Chamption Type', self.champ_type) except: None if gen % 5 == 0: print('Learner Fitness', [ utils.pprint(learner.value) for learner in self.portfolio ]) if self.total_frames > frame_limit: break ###Kill all processes try: for p in self.task_pipes: p[0].send('TERMINATE') for p in self.test_task_pipes: p[0].send('TERMINATE') for p in self.evo_task_pipes: p[0].send('TERMINATE') except: None
def train(self, frame_limit): # Define Tracker class to track scores test_tracker = utils.Tracker( self.args.savefolder, ['score_' + self.args.savetag, 'r2_' + self.args.savetag], '.csv') # Tracker class to log progress grad_temp = [ str(i) + 'entropy_' + self.args.savetag for i in range(len(self.portfolio)) ] + [ str(i) + 'policyQ_' + self.args.savetag for i in range(len(self.portfolio)) ] grad_tracker = utils.Tracker(self.args.aux_folder, grad_temp, '.csv') # Tracker class to log progress time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration max_fitness, champ_len, all_eplens, test_mean, test_std = self.forward_generation( gen, test_tracker) print('Gen/Frames', gen, '/', self.total_frames, ' Pop_max/max_ever:', '%.2f' % max_fitness, '/', '%.2f' % self.best_score, ' Avg:', '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:', '%.2f' % (self.total_frames / (time.time() - time_start)), ' Champ_len', '%.2f' % champ_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), 'Ep_len', '%.2f' % self.ep_len, '#Footsteps', '%.2f' % self.num_footsteps, 'R2_Reward', '%.2f' % self.r1_reward, 'savetag', self.args.savetag) grad_temp = [ algo.algo.entropy['mean'] for algo in self.portfolio ] + [algo.algo.policy_q['mean'] for algo in self.portfolio] grad_tracker.update(grad_temp, self.total_frames) if gen % 5 == 0: print('Learner Fitness', [ utils.pprint(learner.value) for learner in self.portfolio ], 'Sum_stats_resource_allocation', [learner.visit_count for learner in self.portfolio]) try: print('Entropy', [ '%.2f' % algo.algo.entropy['mean'] for algo in self.portfolio ], 'Next_Entropy', [ '%.2f' % algo.algo.next_entropy['mean'] for algo in self.portfolio ], 'Poilcy_Q', [ '%.2f' % algo.algo.policy_q['mean'] for algo in self.portfolio ], 'Critic_Loss', [ '%.2f' % algo.algo.critic_loss['mean'] for algo in self.portfolio ]) print() except: None if self.total_frames > frame_limit: break ###Kill all processes try: for p in self.task_pipes: p[0].send('TERMINATE') for p in self.test_task_pipes: p[0].send('TERMINATE') for p in self.evo_task_pipes: p[0].send('TERMINATE') except: None