def train(self, frame_limit): # Define Tracker class to track scores test_tracker = utils.Tracker(self.args.savefolder, ['shaped_' + self.args.savetag, 'r2_'+self.args.savetag], '.csv') # Tracker class to log progress grad_tracker = utils.Tracker(self.args.aux_folder, ['entropy_'+self.args.savetag, 'policyQ_'+self.args.savetag], '.csv') # Tracker class to log progress #writer = SummaryWriter(self.args.savefolder) time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration test_mean, test_std = self.forward_epoch(gen, test_tracker) print('Gen/Frames', gen,'/',self.total_frames, 'max_ever:','%.2f'%self.best_score, ' Avg:','%.2f'%test_tracker.all_tracker[0][1], ' Frames/sec:','%.2f'%(self.total_frames/(time.time()-time_start)), ' Test/RolloutScore', '%.2f'%self.test_trace[-1] if len(self.test_trace)>0 else None, '%.2f'% self.rollout_fits_trace[-1], 'Ep_len', '%.2f'%self.ep_len, '#Footsteps', '%.2f'%self.num_footsteps, 'R2_Reward', '%.2f'%self.r1_reward, 'savetag', self.args.savetag) grad_tracker.update([self.algo.entropy['mean'], self.algo.policy_q['mean']], self.total_frames) if gen % 5 == 0: print() print('Entropy', self.algo.entropy['mean'], 'Next_Entropy', self.algo.next_entropy['mean'], 'Poilcy_Q', self.algo.policy_q['mean'], 'Critic_Loss', self.algo.critic_loss['mean']) print() if self.total_frames > frame_limit: break
def train(self, frame_limit): # Define Tracker class to track scores #if len(self.env_constructor.params['train_workloads']) == 1: test_tracker = utils.Tracker( self.args.plot_folder, ['score_' + self.args.savetag, 'speedup' + self.args.savetag], '.csv') # Tracker class to log progress time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration self.forward_generation(gen, test_tracker) print('Gen/Frames', gen, '/', self.total_frames, ' Score', '%.2f' % self.best_score, ' Speedup', '%.2f' % self.best_speedup, ' Frames/sec:', '%.2f' % (self.total_frames / (time.time() - time_start)), ' Savetag', self.args.savetag) if self.total_frames > frame_limit: break ###Kill all processes try: for p in self.task_pipes: p[0].send('TERMINATE') for p in self.test_task_pipes: p[0].send('TERMINATE') for p in self.evo_task_pipes: p[0].send('TERMINATE') except: None
def train(self, frame_limit): # Define Tracker class to track scores test_tracker = utils.Tracker(self.args.savefolder, ['score_' + self.args.savetag], '.csv') # Tracker class to log progress time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration max_fitness, champ_len, all_eplens, test_mean, test_std, rollout_fitness, rollout_eplens = self.forward_generation(gen, test_tracker) if test_mean: self.args.writer.add_scalar('test_score', test_mean, gen) print('Gen/Frames:', gen,'/',self.total_frames, ' Gen_max_score:', '%.2f'%max_fitness, ' Champ_len', '%.2f'%champ_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), ' Rollout_u/std:', utils.pprint(np.mean(np.array(rollout_fitness))), utils.pprint(np.std(np.array(rollout_fitness))), ' Rollout_mean_eplen:', utils.pprint(sum(rollout_eplens)/len(rollout_eplens)) if rollout_eplens else None) if gen % 5 == 0: print('Best_score_ever:''/','%.2f'%self.best_score, ' FPS:','%.2f'%(self.total_frames/(time.time()-time_start)), 'savetag', self.args.savetag) print() if self.total_frames > frame_limit: break ###Kill all processes try: for p in self.task_pipes: p[0].send('TERMINATE') for p in self.test_task_pipes: p[0].send('TERMINATE') for p in self.evo_task_pipes: p[0].send('TERMINATE') except: None
def train(self, frame_limit): # Define Tracker class to track scores test_tracker = utils.Tracker( self.args.savefolder, ['score_' + self.args.savetag, 'r1_' + self.args.savetag], '.csv') # Tracker class to log progress time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration test_mean, test_std = self.forward_epoch(gen, test_tracker) print('Gen/Frames', gen, '/', self.total_frames, 'max_ever:', '%.2f' % self.best_score, ' Avg:', '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:', '%.2f' % (self.total_frames / (time.time() - time_start)), ' Test Trace', ['%.2f' % i for i in self.test_trace[-5:]], 'Ep_len', '%.2f' % self.ep_len, '#Footsteps', '%.2f' % self.num_footsteps, 'R1_Reward', '%.2f' % self.r1_reward, 'savetag', self.args.savetag) # if gen % 5 == 0: # print() # # print('Entropy', self.algo.entropy, 'Next_Entropy', self.algo.next_entropy, 'Poilcy_Q', self.algo.policy_q, 'Critic_Loss', self.algo.critic_loss) # # print() if self.total_frames > frame_limit: break
def __init__(self, args): self.args = args self.device = args.device self.D_train_sample = args.batch_size * args.D_iters self.batch_sample1 = self.D_train_sample + args.batch_size if 'rsgan' in args.g_loss_mode else self.D_train_sample self.batch_sample2 = self.batch_sample1 + args.eval_size if 'fitness' in args.g_loss_mode else self.batch_sample1 self.no_FID = True self.no_IS = True self.sess, self.mu_real, self.sigma_real, self.get_inception_metrics = None, None, None, None if self.args.use_pytorch_scores and self.args.test_name: parallel = False if 'FID' in args.test_name: self.no_FID = False if 'IS' in args.test_name: self.no_IS = False self.get_inception_metrics = prepare_inception_metrics( args.dataset_name, parallel, self.no_IS, self.no_FID) else: if 'FID' in args.test_name: if self.args.dataset_name == 'CIFAR10': STAT_FILE = './tflib/TTUR/stats/fid_stats_cifar10_train.npz' elif self.args.dataset_name == 'CelebA': STAT_FILE = './tflib/TTUR/stats/fid_stats_celeba.npz' elif self.args.dataset_name == 'LSUN': STAT_FILE = './tflib/TTUR/stats/fid_stats_lsun_train.npz' INCEPTION_PATH = './tflib/IS/imagenet' print("load train stats.. ") # load precalculated training set statistics f = np.load(STAT_FILE) self.mu_real, self.sigma_real = f['mu'][:], f['sigma'][:] f.close() print("ok") inception_path = fid.check_or_download_inception( INCEPTION_PATH) # download inception network fid.create_inception_graph( inception_path) # load the graph into the current TF graph config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) self.no_FID = False if 'IS' in args.test_name: self.no_IS = False # Define Tracker class to track scores self.test_tracker = utils.Tracker( self.args.savefolder, ['score_' + self.args.savetag], '.csv') # Tracker class to log progress
def __init__(self, args, model_constructor, env_constructor, observation_space, action_space, env, state_template, test_envs, platform): self.args = args model_constructor.state_dim += 2 self.platform = platform self.policy_string = self.compute_policy_type() self.device = torch.device("cuda" if torch.cuda.is_available( ) else "cpu") if self.args.gpu else torch.device('cpu') #Evolution dram_action = torch.ones((len(state_template.x), 2)) + 1 state_template.x = torch.cat([state_template.x, dram_action], axis=1) self.evolver = MixedSSNE( self.args, state_template ) #GA(self.args) if args.boltzman else SSNE(self.args) self.env_constructor = env_constructor self.test_tracker = utils.Tracker( self.args.plot_folder, ['score_' + self.args.savetag, 'speedup_' + self.args.savetag], '.csv') # Tracker class to log progress self.time_tracker = utils.Tracker(self.args.plot_folder, [ 'timed_score_' + self.args.savetag, 'timed_speedup_' + self.args.savetag ], '.csv') self.champ_tracker = utils.Tracker(self.args.plot_folder, [ 'champ_score_' + self.args.savetag, 'champ_speedup_' + self.args.savetag ], '.csv') self.pg_tracker = utils.Tracker(self.args.plot_folder, [ 'pg_noisy_speedup_' + self.args.savetag, 'pg_clean_speedup_' + self.args.savetag ], '.csv') self.migration_tracker = utils.Tracker(self.args.plot_folder, [ 'selection_rate_' + self.args.savetag, 'elite_rate_' + self.args.savetag ], '.csv') #Generalization Trackers self.r50_tracker = utils.Tracker(self.args.plot_folder, [ 'r50_score_' + self.args.savetag, 'r50_speedup_' + self.args.savetag ], '.csv') self.r101_tracker = utils.Tracker(self.args.plot_folder, [ 'r101_score_' + self.args.savetag, 'r101_speedup_' + self.args.savetag ], '.csv') self.bert_tracker = utils.Tracker(self.args.plot_folder, [ 'bert_score_' + self.args.savetag, 'bert_speedup_' + self.args.savetag ], '.csv') self.r50_frames_tracker = utils.Tracker(self.args.plot_folder, [ 'r50_score_' + self.args.savetag, 'r50_speedup_' + self.args.savetag ], '.csv') self.r101_frames_tracker = utils.Tracker(self.args.plot_folder, [ 'r101_score_' + self.args.savetag, 'r101_speedup_' + self.args.savetag ], '.csv') self.bert_frames_tracker = utils.Tracker(self.args.plot_folder, [ 'bert_score_' + self.args.savetag, 'bert_speedup_' + self.args.savetag ], '.csv') #Genealogy tool self.genealogy = Genealogy() self.env = env self.test_envs = test_envs if self.args.use_mp: #MP TOOLS self.manager = Manager() #Initialize Mixed Population self.population = self.manager.list() else: self.population = [] boltzman_count = int(args.pop_size * args.ratio) rest = args.pop_size - boltzman_count for _ in range(boltzman_count): self.population.append( BoltzmannChromosome(model_constructor.num_nodes, model_constructor.action_dim)) for _ in range(rest): self.population.append( model_constructor.make_model(self.policy_string)) self.population[-1].eval() #Save best policy self.best_policy = model_constructor.make_model(self.policy_string) #Init BUFFER self.replay_buffer = Buffer(args.buffer_size, state_template, action_space, args.aux_folder + args.savetag) self.data_bucket = self.replay_buffer.tuples #Intialize portfolio of learners self.portfolio = [] if args.rollout_size > 0: self.portfolio = initialize_portfolio(self.portfolio, self.args, self.genealogy, args.portfolio_id, model_constructor) #Initialize Rollout Bucket self.rollout_bucket = self.manager.list() if self.args.use_mp else [] for _ in range(len(self.portfolio)): self.rollout_bucket.append( model_constructor.make_model(self.policy_string)) if self.args.use_mp: ############## MULTIPROCESSING TOOLS ################### #Evolutionary population Rollout workers data_bucket = self.data_bucket if args.rollout_size > 0 else None #If Strictly Evo - don;t store data self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_workers = [ Process(target=rollout_worker, args=(id, 'evo', self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], data_bucket, self.population, env_constructor)) for id in range(args.pop_size) ] for worker in self.evo_workers: worker.start() #Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [ Process(target=rollout_worker, args=(id, 'pg', self.task_pipes[id][1], self.result_pipes[id][0], data_bucket, self.rollout_bucket, env_constructor)) for id in range(args.rollout_size) ] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] self.evo_flag = [True for _ in range(args.pop_size)] #Meta-learning controller (Resource Distribution) self.allocation = [ ] #Allocation controls the resource allocation across learners for i in range(args.rollout_size): self.allocation.append( i % len(self.portfolio)) #Start uniformly (equal resources) #Trackers self.best_score = -float('inf') self.gen_frames = 0 self.total_frames = 0 self.best_speedup = -float('inf') self.champ_type = None
def train(self, iterations_limit): # Define Tracker class to track scores test_tracker = utils.Tracker(self.args.savefolder, ['score_' + self.args.savetag], '.csv') # Tracker class to log progress if self.args.dataset_name == '8gaussians' or self.args.dataset_name == '25gaussians' or self.args.dataset_name == 'swissroll': mmd_computer = MMD_loss() time_start = time.time() epoch_num = 0 gen = 1 while gen < iterations_limit + 1: # Infinite generations epoch_start_time = time.time() # timer for entire epoch iter_data_time = time.time( ) # timer for data loading per iteration for _, real_samples in enumerate(self.env.train_dataset): iter_start_time = time.time( ) # timer for computation per iteration if self.args.dataset_name != '8gaussians' and self.args.dataset_name != '25gaussians' and self.args.dataset_name != 'swissroll': real_samples = real_samples[0] else: test_samples = real_samples[:512] real_samples = real_samples[512:] # Train one iteration selected_operator = self.forward_generation( gen, test_tracker, real_samples[:self.D_train_sample].to(device=self.device), real_samples[self.D_train_sample:self.batch_sample1].to( device=self.device), real_samples[self.batch_sample1:self.batch_sample2].to( device=self.device), real_samples[-self.args.eval_size:].to(device=self.device)) if gen % 1000 == 0: t_data = iter_start_time - iter_data_time t_comp = (time.time() - iter_start_time) / self.args.batch_size utils.print_current_losses(epoch_num, gen, t_comp, t_data, selected_operator) # print('Gen:', gen, 'selected_operator:', selected_operator, ' GPS:', # '%.2f' % (gen / (time.time() - time_start)), ' IS_score u/std', # utils.pprint(IS_mean) if IS_mean is not None else None, # utils.pprint(IS_var) if IS_var is not None else None, # ' FID_score', utils.pprint(FID) if FID is not None else None) # print() # if gen % 100 == 0: # self.args.writer.add_scalar('GPS', gen / (time.time() - time_start), gen) ###### TEST SCORE ###### if gen % 5000 == 0: self.learner.netG.load_state_dict(self.evolver.genes[-1]) torch.save( self.learner.netG, './checkpoint/{0}/netG_{1}.pth'.format( self.args.dataset_name, gen)) if self.args.test_name and gen % self.args.test_frequency == 0: # FIGURE OUT THE CHAMP POLICY AND SYNC IT TO TEST self.learner.netG.load_state_dict(self.evolver.genes[-1]) scores = tester(self.args, self.learner.netG, not self.no_FID, not self.no_IS, self.sess, self.mu_real, self.sigma_real, self.get_inception_metrics) if not self.no_IS: test_tracker.update([scores['IS_mean']], gen) test_tracker.update([scores['IS_var']], gen) self.args.writer.add_scalar('IS_score', scores['IS_mean'], gen) if not self.no_FID: test_tracker.update([scores['FID']], gen) self.args.writer.add_scalar('FID_score', scores['FID'], gen) utils.print_current_scores(epoch_num, gen, scores) if gen % 1000 == 0: self.learner.netG.load_state_dict(self.evolver.genes[-1]) if self.args.dataset_name == '8gaussians' or self.args.dataset_name == '25gaussians' or self.args.dataset_name == 'swissroll': # toy_true_dist_kde(self.args, real_samples) utils.toy_generate_kde(self.args, self.learner.netG) with torch.no_grad(): noisev = torch.randn(512, self.args.z_dim, device=self.args.device) gen_samples = self.learner.netG(noisev).detach() test_samples = test_samples.to(device=self.device) mmd2 = mmd_computer.forward(gen_samples, test_samples) # mmd2 = abs(compute_metric_mmd2(gen_samples, test_samples)) test_tracker.update([mmd2], gen) self.args.writer.add_scalar('mmd2', mmd2, gen) test_samples = test_samples.detach().cpu().numpy() gen_samples = gen_samples.detach().cpu().numpy() utils.toy_generate_image(self.args, test_samples, gen_samples) else: utils.generate_image(gen, self.args, self.learner.netG) gen += 1 epoch_num += 1 print('(epoch_%d) End of giters %d / %d \t Time Taken: %d sec' % (epoch_num, gen, iterations_limit, time.time() - epoch_start_time)) self.args.writer.close()
def train(self, frame_limit): # Define Tracker class to track scores test_tracker = utils.Tracker( self.args.savefolder, ['score_' + self.args.savetag, 'r2_' + self.args.savetag], '.csv') # Tracker class to log progress grad_temp = [ str(i) + 'entropy_' + self.args.savetag for i in range(len(self.portfolio)) ] + [ str(i) + 'policyQ_' + self.args.savetag for i in range(len(self.portfolio)) ] grad_tracker = utils.Tracker(self.args.aux_folder, grad_temp, '.csv') # Tracker class to log progress time_start = time.time() for gen in range(1, 1000000000): # Infinite generations # Train one iteration max_fitness, champ_len, all_eplens, test_mean, test_std = self.forward_generation( gen, test_tracker) print('Gen/Frames', gen, '/', self.total_frames, ' Pop_max/max_ever:', '%.2f' % max_fitness, '/', '%.2f' % self.best_score, ' Avg:', '%.2f' % test_tracker.all_tracker[0][1], ' Frames/sec:', '%.2f' % (self.total_frames / (time.time() - time_start)), ' Champ_len', '%.2f' % champ_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), 'Ep_len', '%.2f' % self.ep_len, '#Footsteps', '%.2f' % self.num_footsteps, 'R2_Reward', '%.2f' % self.r1_reward, 'savetag', self.args.savetag) grad_temp = [ algo.algo.entropy['mean'] for algo in self.portfolio ] + [algo.algo.policy_q['mean'] for algo in self.portfolio] grad_tracker.update(grad_temp, self.total_frames) if gen % 5 == 0: print('Learner Fitness', [ utils.pprint(learner.value) for learner in self.portfolio ], 'Sum_stats_resource_allocation', [learner.visit_count for learner in self.portfolio]) try: print('Entropy', [ '%.2f' % algo.algo.entropy['mean'] for algo in self.portfolio ], 'Next_Entropy', [ '%.2f' % algo.algo.next_entropy['mean'] for algo in self.portfolio ], 'Poilcy_Q', [ '%.2f' % algo.algo.policy_q['mean'] for algo in self.portfolio ], 'Critic_Loss', [ '%.2f' % algo.algo.critic_loss['mean'] for algo in self.portfolio ]) print() except: None if self.total_frames > frame_limit: break ###Kill all processes try: for p in self.task_pipes: p[0].send('TERMINATE') for p in self.test_task_pipes: p[0].send('TERMINATE') for p in self.evo_task_pipes: p[0].send('TERMINATE') except: None
parser.add_argument('--model_file', type=str, default='conv3x256value.h5') parser.add_argument('--profile') parser.add_argument('--render', action="store_false", default=False) # runner params parser.add_argument('--num_epochs', type=int, default=10000000) parser.add_argument('--num_runners', type=int, default=1) # MCTS params parser.add_argument('--mcts_iters', type=int, default=50) parser.add_argument('--mcts_c_puct', type=float, default=1.0) # RL params parser.add_argument('--discount', type=float, default=0.9) parser.add_argument('--temperature', type=float, default=0.4) args = parser.parse_args() #Trackers expert_tracker = utils.Tracker('log', ['expert_reward'], '.csv') # Initiate tracker net_tracker = utils.Tracker('log', ['net_reward'], '.csv') # Initiate tracker #Load/Create Model if LOAD_TRAINED_MODEL: model = torch.load('pretrained_model.pth') else: model = models.Conv_model(z_dim=250) model.cuda() #Initialize learning agent agent = Agent() #Initialize expert expert = eXit.Expert(0, args) #Initialize imitation engine and evaluator imitation_engine = models.Imitation(model) evaluator = Evaluate(agent)