def train(self, frame_limit):
		# Define Tracker class to track scores
		test_tracker = utils.Tracker(self.args.savefolder, ['score_' + self.args.savetag], '.csv')  # Tracker class to log progress
		time_start = time.time()

		for gen in range(1, 1000000000):  # Infinite generations

			# Train one iteration
			max_fitness, champ_len, all_eplens, test_mean, test_std, rollout_fitness, rollout_eplens = self.forward_generation(gen, test_tracker)
			if test_mean: self.args.writer.add_scalar('test_score', test_mean, gen)

			print('Gen/Frames:', gen,'/',self.total_frames,
				  ' Gen_max_score:', '%.2f'%max_fitness,
				  ' Champ_len', '%.2f'%champ_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std),
				  ' Rollout_u/std:', utils.pprint(np.mean(np.array(rollout_fitness))), utils.pprint(np.std(np.array(rollout_fitness))),
				  ' Rollout_mean_eplen:', utils.pprint(sum(rollout_eplens)/len(rollout_eplens)) if rollout_eplens else None)

			if gen % 5 == 0:
				print('Best_score_ever:''/','%.2f'%self.best_score, ' FPS:','%.2f'%(self.total_frames/(time.time()-time_start)), 'savetag', self.args.savetag)
				print()

			if self.total_frames > frame_limit:
				break

		###Kill all processes
		try:
			for p in self.task_pipes: p[0].send('TERMINATE')
			for p in self.test_task_pipes: p[0].send('TERMINATE')
			for p in self.evo_task_pipes: p[0].send('TERMINATE')
		except:
			None
Example #2
0
 def pprint(self, string, label, color=None, labelColor=None):
     ''' formats the code so that the string is on the left and 
         the label goes on the right side. with padding in the center
     '''
     assert string, "there is no string value, or it is None!"
     assert lable,  "there is no label or it is None!"
     pprint(string, label, color, labelColor)
Example #3
0
def test_locally():

    env = L2M(frameskip=FRAMESKIP,
              difficulty=0,
              action_clamp=False,
              visualize=False)
    state = torch.Tensor(env.reset())

    time = 0
    total_reward = 0
    while True:
        time = time + FRAMESKIP

        action = net.clean_action(state).detach()
        hack_action = []
        for i in range(22):
            action_i = action[0, i].item()
            #action_i = (action_i+1.0)/2.0
            if action_i < 0: action_i = 0.0
            # if action_i < -0.5: action_i = -1.0
            # elif action_i > 0.5: action_i = 1.0
            # else: action_i = 0.0
            hack_action.append(action_i)

        state, reward, done, info = env.step(hack_action)
        state = torch.Tensor(state)
        total_reward += reward

        print('Seed', seed, 'Local Test', time, 'R1_Reward',
              '%.2f' % env.original_reward, 'Shaped_Reward',
              '%.2f' % total_reward, utils.pprint(hack_action))

        if done:
            break
Example #4
0
    def train(self, frame_limit):
        # Define Tracker class to track scores
        test_tracker = utils.Tracker(
            self.args.savefolder,
            ['score_' + self.args.savetag, 'r1_' + self.args.savetag],
            '.csv')  # Tracker class to log progress
        time_start = time.time()

        for gen in range(1, 1000000000):  # Infinite generations

            # Train one iteration
            test_mean, test_std = self.forward_epoch(gen, test_tracker)

            print('Gen/Frames', gen, '/', self.total_frames, 'max_ever:',
                  '%.2f' % self.best_score, ' Avg:',
                  '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:',
                  '%.2f' % (self.total_frames / (time.time() - time_start)),
                  ' Test/RolloutScore', '%.2f' % self.test_trace[-1],
                  '%.2f' % self.rollout_fits_trace[-1], 'Ep_len',
                  '%.2f' % self.ep_len, '#Footsteps',
                  '%.2f' % self.num_footsteps, 'R1_Reward',
                  '%.2f' % self.r1_reward, 'savetag', self.args.savetag)

            if gen % 5 == 0:
                print()

                print('Entropy',
                      utils.pprint(math.exp(self.algo.entropy['mean'])),
                      'Next_Entropy',
                      utils.pprint(math.exp(self.algo.next_entropy['mean'])),
                      'Q_Loss', utils.pprint(self.algo.critic_loss['mean']),
                      'Q', utils.pprint(self.algo.policy_q['mean']), 'Next_Q',
                      utils.pprint(self.algo.next_q['mean']))

                print()

            if self.total_frames > frame_limit:
                break
Example #5
0
    def train(self, frame_limit):

        time_start = time.time()

        for gen in range(1, 1000000000):  # Infinite generations

            # Train one iteration
            gen_best = self.forward_generation(gen, time_start)

            print()
            print('Gen/Frames', gen, '/', self.total_frames, 'Gen_Score',
                  '%.2f' % gen_best, 'Best_Score', '%.2f' % self.best_score,
                  ' Speedup', '%.2f' % self.best_speedup, ' Frames/sec:',
                  '%.2f' % (self.total_frames / (time.time() - time_start)),
                  'Buffer', self.replay_buffer.__len__(), 'Savetag',
                  self.args.savetag)
            for net in self.population:

                print(net.model_type, net.fitness_stats)
                if net.model_type == 'BoltzmanChromosome':
                    print(net.temperature_stats)
                print()
            print()

            try:
                print('Initial Ratio', self.args.ratio, 'Current Ratio',
                      self.evolver.ratio, 'Chamption Type', self.champ_type)
            except:
                None

            if gen % 5 == 0:
                print('Learner Fitness', [
                    utils.pprint(learner.value) for learner in self.portfolio
                ])

            if self.total_frames > frame_limit:
                break

        ###Kill all processes
        try:
            for p in self.task_pipes:
                p[0].send('TERMINATE')
            for p in self.test_task_pipes:
                p[0].send('TERMINATE')
            for p in self.evo_task_pipes:
                p[0].send('TERMINATE')
        except:
            None
Example #6
0
    def train(self, frame_limit):
        # Define Tracker class to track scores
        test_tracker = utils.Tracker(
            self.args.savefolder,
            ['score_' + self.args.savetag, 'r2_' + self.args.savetag],
            '.csv')  # Tracker class to log progress

        grad_temp = [
            str(i) + 'entropy_' + self.args.savetag
            for i in range(len(self.portfolio))
        ] + [
            str(i) + 'policyQ_' + self.args.savetag
            for i in range(len(self.portfolio))
        ]
        grad_tracker = utils.Tracker(self.args.aux_folder, grad_temp,
                                     '.csv')  # Tracker class to log progress
        time_start = time.time()

        for gen in range(1, 1000000000):  # Infinite generations

            # Train one iteration
            max_fitness, champ_len, all_eplens, test_mean, test_std = self.forward_generation(
                gen, test_tracker)

            print('Gen/Frames', gen, '/', self.total_frames,
                  ' Pop_max/max_ever:', '%.2f' % max_fitness, '/',
                  '%.2f' % self.best_score, ' Avg:',
                  '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:',
                  '%.2f' % (self.total_frames / (time.time() - time_start)),
                  ' Champ_len', '%.2f' % champ_len, ' Test_score u/std',
                  utils.pprint(test_mean), utils.pprint(test_std), 'Ep_len',
                  '%.2f' % self.ep_len, '#Footsteps',
                  '%.2f' % self.num_footsteps, 'R2_Reward',
                  '%.2f' % self.r1_reward, 'savetag', self.args.savetag)

            grad_temp = [
                algo.algo.entropy['mean'] for algo in self.portfolio
            ] + [algo.algo.policy_q['mean'] for algo in self.portfolio]
            grad_tracker.update(grad_temp, self.total_frames)

            if gen % 5 == 0:
                print('Learner Fitness', [
                    utils.pprint(learner.value) for learner in self.portfolio
                ], 'Sum_stats_resource_allocation',
                      [learner.visit_count for learner in self.portfolio])
                try:
                    print('Entropy', [
                        '%.2f' % algo.algo.entropy['mean']
                        for algo in self.portfolio
                    ], 'Next_Entropy', [
                        '%.2f' % algo.algo.next_entropy['mean']
                        for algo in self.portfolio
                    ], 'Poilcy_Q', [
                        '%.2f' % algo.algo.policy_q['mean']
                        for algo in self.portfolio
                    ], 'Critic_Loss', [
                        '%.2f' % algo.algo.critic_loss['mean']
                        for algo in self.portfolio
                    ])
                    print()
                except:
                    None

            if self.total_frames > frame_limit:
                break

        ###Kill all processes
        try:
            for p in self.task_pipes:
                p[0].send('TERMINATE')
            for p in self.test_task_pipes:
                p[0].send('TERMINATE')
            for p in self.evo_task_pipes:
                p[0].send('TERMINATE')
        except:
            None