Beispiel #1
0
	def train(self, frame_limit):
		# Define Tracker class to track scores
		test_tracker = utils.Tracker(self.args.savefolder, ['shaped_' + self.args.savetag, 'r2_'+self.args.savetag], '.csv')  # Tracker class to log progress
		grad_tracker = utils.Tracker(self.args.aux_folder, ['entropy_'+self.args.savetag, 'policyQ_'+self.args.savetag], '.csv')  # Tracker class to log progress
		#writer = SummaryWriter(self.args.savefolder)

		time_start = time.time()

		for gen in range(1, 1000000000):  # Infinite generations

			# Train one iteration
			test_mean, test_std = self.forward_epoch(gen, test_tracker)

			print('Gen/Frames', gen,'/',self.total_frames, 'max_ever:','%.2f'%self.best_score, ' Avg:','%.2f'%test_tracker.all_tracker[0][1],
		      ' Frames/sec:','%.2f'%(self.total_frames/(time.time()-time_start)),
			   ' Test/RolloutScore', '%.2f'%self.test_trace[-1] if len(self.test_trace)>0 else None, '%.2f'% self.rollout_fits_trace[-1],
				  'Ep_len', '%.2f'%self.ep_len, '#Footsteps', '%.2f'%self.num_footsteps, 'R2_Reward', '%.2f'%self.r1_reward,
				  'savetag', self.args.savetag)

			grad_tracker.update([self.algo.entropy['mean'], self.algo.policy_q['mean']], self.total_frames)

			if gen % 5 == 0:
				print()

				print('Entropy', self.algo.entropy['mean'], 'Next_Entropy', self.algo.next_entropy['mean'], 'Poilcy_Q', self.algo.policy_q['mean'], 'Critic_Loss', self.algo.critic_loss['mean'])

				print()

			if self.total_frames > frame_limit:
				break
Beispiel #2
0
    def train(self, frame_limit):
        # Define Tracker class to track scores
        #if len(self.env_constructor.params['train_workloads']) == 1:
        test_tracker = utils.Tracker(
            self.args.plot_folder,
            ['score_' + self.args.savetag, 'speedup' + self.args.savetag],
            '.csv')  # Tracker class to log progress

        time_start = time.time()

        for gen in range(1, 1000000000):  # Infinite generations

            # Train one iteration
            self.forward_generation(gen, test_tracker)

            print('Gen/Frames', gen, '/', self.total_frames, ' Score',
                  '%.2f' % self.best_score, ' Speedup',
                  '%.2f' % self.best_speedup, ' Frames/sec:',
                  '%.2f' % (self.total_frames / (time.time() - time_start)),
                  ' Savetag', self.args.savetag)

            if self.total_frames > frame_limit:
                break

        ###Kill all processes
        try:
            for p in self.task_pipes:
                p[0].send('TERMINATE')
            for p in self.test_task_pipes:
                p[0].send('TERMINATE')
            for p in self.evo_task_pipes:
                p[0].send('TERMINATE')
        except:
            None
	def train(self, frame_limit):
		# Define Tracker class to track scores
		test_tracker = utils.Tracker(self.args.savefolder, ['score_' + self.args.savetag], '.csv')  # Tracker class to log progress
		time_start = time.time()

		for gen in range(1, 1000000000):  # Infinite generations

			# Train one iteration
			max_fitness, champ_len, all_eplens, test_mean, test_std, rollout_fitness, rollout_eplens = self.forward_generation(gen, test_tracker)
			if test_mean: self.args.writer.add_scalar('test_score', test_mean, gen)

			print('Gen/Frames:', gen,'/',self.total_frames,
				  ' Gen_max_score:', '%.2f'%max_fitness,
				  ' Champ_len', '%.2f'%champ_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std),
				  ' Rollout_u/std:', utils.pprint(np.mean(np.array(rollout_fitness))), utils.pprint(np.std(np.array(rollout_fitness))),
				  ' Rollout_mean_eplen:', utils.pprint(sum(rollout_eplens)/len(rollout_eplens)) if rollout_eplens else None)

			if gen % 5 == 0:
				print('Best_score_ever:''/','%.2f'%self.best_score, ' FPS:','%.2f'%(self.total_frames/(time.time()-time_start)), 'savetag', self.args.savetag)
				print()

			if self.total_frames > frame_limit:
				break

		###Kill all processes
		try:
			for p in self.task_pipes: p[0].send('TERMINATE')
			for p in self.test_task_pipes: p[0].send('TERMINATE')
			for p in self.evo_task_pipes: p[0].send('TERMINATE')
		except:
			None
Beispiel #4
0
    def train(self, frame_limit):
        # Define Tracker class to track scores
        test_tracker = utils.Tracker(
            self.args.savefolder,
            ['score_' + self.args.savetag, 'r1_' + self.args.savetag],
            '.csv')  # Tracker class to log progress
        time_start = time.time()

        for gen in range(1, 1000000000):  # Infinite generations

            # Train one iteration
            test_mean, test_std = self.forward_epoch(gen, test_tracker)

            print('Gen/Frames', gen, '/', self.total_frames, 'max_ever:',
                  '%.2f' % self.best_score, ' Avg:',
                  '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:',
                  '%.2f' % (self.total_frames / (time.time() - time_start)),
                  ' Test Trace', ['%.2f' % i for i in self.test_trace[-5:]],
                  'Ep_len', '%.2f' % self.ep_len, '#Footsteps',
                  '%.2f' % self.num_footsteps, 'R1_Reward',
                  '%.2f' % self.r1_reward, 'savetag', self.args.savetag)

            # if gen % 5 == 0:
            # 	print()
            #
            # 	print('Entropy', self.algo.entropy, 'Next_Entropy', self.algo.next_entropy, 'Poilcy_Q', self.algo.policy_q, 'Critic_Loss', self.algo.critic_loss)
            #
            # 	print()

            if self.total_frames > frame_limit:
                break
Beispiel #5
0
    def __init__(self, args):
        self.args = args
        self.device = args.device
        self.D_train_sample = args.batch_size * args.D_iters
        self.batch_sample1 = self.D_train_sample + args.batch_size if 'rsgan' in args.g_loss_mode else self.D_train_sample
        self.batch_sample2 = self.batch_sample1 + args.eval_size if 'fitness' in args.g_loss_mode else self.batch_sample1

        self.no_FID = True
        self.no_IS = True
        self.sess, self.mu_real, self.sigma_real, self.get_inception_metrics = None, None, None, None
        if self.args.use_pytorch_scores and self.args.test_name:
            parallel = False
            if 'FID' in args.test_name:
                self.no_FID = False
            if 'IS' in args.test_name:
                self.no_IS = False
            self.get_inception_metrics = prepare_inception_metrics(
                args.dataset_name, parallel, self.no_IS, self.no_FID)
        else:
            if 'FID' in args.test_name:
                if self.args.dataset_name == 'CIFAR10':
                    STAT_FILE = './tflib/TTUR/stats/fid_stats_cifar10_train.npz'
                elif self.args.dataset_name == 'CelebA':
                    STAT_FILE = './tflib/TTUR/stats/fid_stats_celeba.npz'
                elif self.args.dataset_name == 'LSUN':
                    STAT_FILE = './tflib/TTUR/stats/fid_stats_lsun_train.npz'
                INCEPTION_PATH = './tflib/IS/imagenet'

                print("load train stats.. ")
                # load precalculated training set statistics
                f = np.load(STAT_FILE)
                self.mu_real, self.sigma_real = f['mu'][:], f['sigma'][:]
                f.close()
                print("ok")

                inception_path = fid.check_or_download_inception(
                    INCEPTION_PATH)  # download inception network
                fid.create_inception_graph(
                    inception_path)  # load the graph into the current TF graph

                config = tf.ConfigProto()
                config.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config)
                self.sess.run(tf.global_variables_initializer())

                self.no_FID = False
            if 'IS' in args.test_name:
                self.no_IS = False
        # Define Tracker class to track scores
        self.test_tracker = utils.Tracker(
            self.args.savefolder, ['score_' + self.args.savetag],
            '.csv')  # Tracker class to log progress
Beispiel #6
0
    def __init__(self, args, model_constructor, env_constructor,
                 observation_space, action_space, env, state_template,
                 test_envs, platform):
        self.args = args
        model_constructor.state_dim += 2
        self.platform = platform

        self.policy_string = self.compute_policy_type()
        self.device = torch.device("cuda" if torch.cuda.is_available(
        ) else "cpu") if self.args.gpu else torch.device('cpu')

        #Evolution
        dram_action = torch.ones((len(state_template.x), 2)) + 1
        state_template.x = torch.cat([state_template.x, dram_action], axis=1)
        self.evolver = MixedSSNE(
            self.args, state_template
        )  #GA(self.args) if args.boltzman else SSNE(self.args)
        self.env_constructor = env_constructor

        self.test_tracker = utils.Tracker(
            self.args.plot_folder,
            ['score_' + self.args.savetag, 'speedup_' + self.args.savetag],
            '.csv')  # Tracker class to log progress
        self.time_tracker = utils.Tracker(self.args.plot_folder, [
            'timed_score_' + self.args.savetag,
            'timed_speedup_' + self.args.savetag
        ], '.csv')
        self.champ_tracker = utils.Tracker(self.args.plot_folder, [
            'champ_score_' + self.args.savetag,
            'champ_speedup_' + self.args.savetag
        ], '.csv')
        self.pg_tracker = utils.Tracker(self.args.plot_folder, [
            'pg_noisy_speedup_' + self.args.savetag,
            'pg_clean_speedup_' + self.args.savetag
        ], '.csv')
        self.migration_tracker = utils.Tracker(self.args.plot_folder, [
            'selection_rate_' + self.args.savetag,
            'elite_rate_' + self.args.savetag
        ], '.csv')

        #Generalization Trackers
        self.r50_tracker = utils.Tracker(self.args.plot_folder, [
            'r50_score_' + self.args.savetag,
            'r50_speedup_' + self.args.savetag
        ], '.csv')
        self.r101_tracker = utils.Tracker(self.args.plot_folder, [
            'r101_score_' + self.args.savetag,
            'r101_speedup_' + self.args.savetag
        ], '.csv')
        self.bert_tracker = utils.Tracker(self.args.plot_folder, [
            'bert_score_' + self.args.savetag,
            'bert_speedup_' + self.args.savetag
        ], '.csv')

        self.r50_frames_tracker = utils.Tracker(self.args.plot_folder, [
            'r50_score_' + self.args.savetag,
            'r50_speedup_' + self.args.savetag
        ], '.csv')
        self.r101_frames_tracker = utils.Tracker(self.args.plot_folder, [
            'r101_score_' + self.args.savetag,
            'r101_speedup_' + self.args.savetag
        ], '.csv')
        self.bert_frames_tracker = utils.Tracker(self.args.plot_folder, [
            'bert_score_' + self.args.savetag,
            'bert_speedup_' + self.args.savetag
        ], '.csv')

        #Genealogy tool
        self.genealogy = Genealogy()

        self.env = env
        self.test_envs = test_envs

        if self.args.use_mp:
            #MP TOOLS
            self.manager = Manager()
            #Initialize Mixed Population
            self.population = self.manager.list()

        else:
            self.population = []

        boltzman_count = int(args.pop_size * args.ratio)
        rest = args.pop_size - boltzman_count
        for _ in range(boltzman_count):
            self.population.append(
                BoltzmannChromosome(model_constructor.num_nodes,
                                    model_constructor.action_dim))

        for _ in range(rest):
            self.population.append(
                model_constructor.make_model(self.policy_string))
            self.population[-1].eval()

        #Save best policy
        self.best_policy = model_constructor.make_model(self.policy_string)

        #Init BUFFER
        self.replay_buffer = Buffer(args.buffer_size, state_template,
                                    action_space,
                                    args.aux_folder + args.savetag)
        self.data_bucket = self.replay_buffer.tuples

        #Intialize portfolio of learners
        self.portfolio = []
        if args.rollout_size > 0:
            self.portfolio = initialize_portfolio(self.portfolio, self.args,
                                                  self.genealogy,
                                                  args.portfolio_id,
                                                  model_constructor)

        #Initialize Rollout Bucket
        self.rollout_bucket = self.manager.list() if self.args.use_mp else []
        for _ in range(len(self.portfolio)):
            self.rollout_bucket.append(
                model_constructor.make_model(self.policy_string))

        if self.args.use_mp:
            ############## MULTIPROCESSING TOOLS ###################
            #Evolutionary population Rollout workers
            data_bucket = self.data_bucket if args.rollout_size > 0 else None  #If Strictly Evo - don;t store data
            self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)]
            self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)]
            self.evo_workers = [
                Process(target=rollout_worker,
                        args=(id, 'evo', self.evo_task_pipes[id][1],
                              self.evo_result_pipes[id][0], data_bucket,
                              self.population, env_constructor))
                for id in range(args.pop_size)
            ]
            for worker in self.evo_workers:
                worker.start()

            #Learner rollout workers
            self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
            self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
            self.workers = [
                Process(target=rollout_worker,
                        args=(id, 'pg', self.task_pipes[id][1],
                              self.result_pipes[id][0], data_bucket,
                              self.rollout_bucket, env_constructor))
                for id in range(args.rollout_size)
            ]
            for worker in self.workers:
                worker.start()

        self.roll_flag = [True for _ in range(args.rollout_size)]
        self.evo_flag = [True for _ in range(args.pop_size)]

        #Meta-learning controller (Resource Distribution)
        self.allocation = [
        ]  #Allocation controls the resource allocation across learners
        for i in range(args.rollout_size):
            self.allocation.append(
                i % len(self.portfolio))  #Start uniformly (equal resources)

        #Trackers
        self.best_score = -float('inf')
        self.gen_frames = 0
        self.total_frames = 0
        self.best_speedup = -float('inf')
        self.champ_type = None
Beispiel #7
0
    def train(self, iterations_limit):
        # Define Tracker class to track scores
        test_tracker = utils.Tracker(self.args.savefolder,
                                     ['score_' + self.args.savetag],
                                     '.csv')  # Tracker class to log progress

        if self.args.dataset_name == '8gaussians' or self.args.dataset_name == '25gaussians' or self.args.dataset_name == 'swissroll':
            mmd_computer = MMD_loss()

        time_start = time.time()

        epoch_num = 0
        gen = 1
        while gen < iterations_limit + 1:  # Infinite generations
            epoch_start_time = time.time()  # timer for entire epoch
            iter_data_time = time.time(
            )  # timer for data loading per iteration

            for _, real_samples in enumerate(self.env.train_dataset):
                iter_start_time = time.time(
                )  # timer for computation per iteration

                if self.args.dataset_name != '8gaussians' and self.args.dataset_name != '25gaussians' and self.args.dataset_name != 'swissroll':
                    real_samples = real_samples[0]
                else:
                    test_samples = real_samples[:512]
                    real_samples = real_samples[512:]

                # Train one iteration
                selected_operator = self.forward_generation(
                    gen, test_tracker,
                    real_samples[:self.D_train_sample].to(device=self.device),
                    real_samples[self.D_train_sample:self.batch_sample1].to(
                        device=self.device),
                    real_samples[self.batch_sample1:self.batch_sample2].to(
                        device=self.device),
                    real_samples[-self.args.eval_size:].to(device=self.device))

                if gen % 1000 == 0:
                    t_data = iter_start_time - iter_data_time
                    t_comp = (time.time() -
                              iter_start_time) / self.args.batch_size
                    utils.print_current_losses(epoch_num, gen, t_comp, t_data,
                                               selected_operator)

                    # print('Gen:', gen, 'selected_operator:', selected_operator, ' GPS:',
                    #       '%.2f' % (gen / (time.time() - time_start)), ' IS_score u/std',
                    #       utils.pprint(IS_mean) if IS_mean is not None else None,
                    #       utils.pprint(IS_var) if IS_var is not None else None,
                    #       ' FID_score', utils.pprint(FID) if FID is not None else None)
                    # print()
                # if gen % 100 == 0:
                #     self.args.writer.add_scalar('GPS', gen / (time.time() - time_start), gen)

                ###### TEST SCORE ######
                if gen % 5000 == 0:
                    self.learner.netG.load_state_dict(self.evolver.genes[-1])
                    torch.save(
                        self.learner.netG,
                        './checkpoint/{0}/netG_{1}.pth'.format(
                            self.args.dataset_name, gen))

                if self.args.test_name and gen % self.args.test_frequency == 0:
                    # FIGURE OUT THE CHAMP POLICY AND SYNC IT TO TEST
                    self.learner.netG.load_state_dict(self.evolver.genes[-1])

                    scores = tester(self.args, self.learner.netG,
                                    not self.no_FID, not self.no_IS, self.sess,
                                    self.mu_real, self.sigma_real,
                                    self.get_inception_metrics)

                    if not self.no_IS:
                        test_tracker.update([scores['IS_mean']], gen)
                        test_tracker.update([scores['IS_var']], gen)
                        self.args.writer.add_scalar('IS_score',
                                                    scores['IS_mean'], gen)
                    if not self.no_FID:
                        test_tracker.update([scores['FID']], gen)
                        self.args.writer.add_scalar('FID_score', scores['FID'],
                                                    gen)
                    utils.print_current_scores(epoch_num, gen, scores)

                if gen % 1000 == 0:
                    self.learner.netG.load_state_dict(self.evolver.genes[-1])
                    if self.args.dataset_name == '8gaussians' or self.args.dataset_name == '25gaussians' or self.args.dataset_name == 'swissroll':
                        # toy_true_dist_kde(self.args, real_samples)
                        utils.toy_generate_kde(self.args, self.learner.netG)

                        with torch.no_grad():
                            noisev = torch.randn(512,
                                                 self.args.z_dim,
                                                 device=self.args.device)
                        gen_samples = self.learner.netG(noisev).detach()
                        test_samples = test_samples.to(device=self.device)
                        mmd2 = mmd_computer.forward(gen_samples, test_samples)
                        # mmd2 = abs(compute_metric_mmd2(gen_samples, test_samples))
                        test_tracker.update([mmd2], gen)
                        self.args.writer.add_scalar('mmd2', mmd2, gen)

                        test_samples = test_samples.detach().cpu().numpy()
                        gen_samples = gen_samples.detach().cpu().numpy()
                        utils.toy_generate_image(self.args, test_samples,
                                                 gen_samples)
                    else:
                        utils.generate_image(gen, self.args, self.learner.netG)

                gen += 1

            epoch_num += 1
            print('(epoch_%d) End of giters %d / %d \t Time Taken: %d sec' %
                  (epoch_num, gen, iterations_limit,
                   time.time() - epoch_start_time))

        self.args.writer.close()
Beispiel #8
0
    def train(self, frame_limit):
        # Define Tracker class to track scores
        test_tracker = utils.Tracker(
            self.args.savefolder,
            ['score_' + self.args.savetag, 'r2_' + self.args.savetag],
            '.csv')  # Tracker class to log progress

        grad_temp = [
            str(i) + 'entropy_' + self.args.savetag
            for i in range(len(self.portfolio))
        ] + [
            str(i) + 'policyQ_' + self.args.savetag
            for i in range(len(self.portfolio))
        ]
        grad_tracker = utils.Tracker(self.args.aux_folder, grad_temp,
                                     '.csv')  # Tracker class to log progress
        time_start = time.time()

        for gen in range(1, 1000000000):  # Infinite generations

            # Train one iteration
            max_fitness, champ_len, all_eplens, test_mean, test_std = self.forward_generation(
                gen, test_tracker)

            print('Gen/Frames', gen, '/', self.total_frames,
                  ' Pop_max/max_ever:', '%.2f' % max_fitness, '/',
                  '%.2f' % self.best_score, ' Avg:',
                  '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:',
                  '%.2f' % (self.total_frames / (time.time() - time_start)),
                  ' Champ_len', '%.2f' % champ_len, ' Test_score u/std',
                  utils.pprint(test_mean), utils.pprint(test_std), 'Ep_len',
                  '%.2f' % self.ep_len, '#Footsteps',
                  '%.2f' % self.num_footsteps, 'R2_Reward',
                  '%.2f' % self.r1_reward, 'savetag', self.args.savetag)

            grad_temp = [
                algo.algo.entropy['mean'] for algo in self.portfolio
            ] + [algo.algo.policy_q['mean'] for algo in self.portfolio]
            grad_tracker.update(grad_temp, self.total_frames)

            if gen % 5 == 0:
                print('Learner Fitness', [
                    utils.pprint(learner.value) for learner in self.portfolio
                ], 'Sum_stats_resource_allocation',
                      [learner.visit_count for learner in self.portfolio])
                try:
                    print('Entropy', [
                        '%.2f' % algo.algo.entropy['mean']
                        for algo in self.portfolio
                    ], 'Next_Entropy', [
                        '%.2f' % algo.algo.next_entropy['mean']
                        for algo in self.portfolio
                    ], 'Poilcy_Q', [
                        '%.2f' % algo.algo.policy_q['mean']
                        for algo in self.portfolio
                    ], 'Critic_Loss', [
                        '%.2f' % algo.algo.critic_loss['mean']
                        for algo in self.portfolio
                    ])
                    print()
                except:
                    None

            if self.total_frames > frame_limit:
                break

        ###Kill all processes
        try:
            for p in self.task_pipes:
                p[0].send('TERMINATE')
            for p in self.test_task_pipes:
                p[0].send('TERMINATE')
            for p in self.evo_task_pipes:
                p[0].send('TERMINATE')
        except:
            None
Beispiel #9
0
    parser.add_argument('--model_file', type=str, default='conv3x256value.h5')
    parser.add_argument('--profile')
    parser.add_argument('--render', action="store_false", default=False)
    # runner params
    parser.add_argument('--num_epochs', type=int, default=10000000)
    parser.add_argument('--num_runners', type=int, default=1)
    # MCTS params
    parser.add_argument('--mcts_iters', type=int, default=50)
    parser.add_argument('--mcts_c_puct', type=float, default=1.0)
    # RL params
    parser.add_argument('--discount', type=float, default=0.9)
    parser.add_argument('--temperature', type=float, default=0.4)
    args = parser.parse_args()

    #Trackers
    expert_tracker = utils.Tracker('log', ['expert_reward'],
                                   '.csv')  # Initiate tracker
    net_tracker = utils.Tracker('log', ['net_reward'],
                                '.csv')  # Initiate tracker

    #Load/Create Model
    if LOAD_TRAINED_MODEL: model = torch.load('pretrained_model.pth')
    else: model = models.Conv_model(z_dim=250)
    model.cuda()
    #Initialize learning agent
    agent = Agent()
    #Initialize expert
    expert = eXit.Expert(0, args)
    #Initialize imitation engine and evaluator
    imitation_engine = models.Imitation(model)
    evaluator = Evaluate(agent)