def main(_): hps = get_hps().parse(FLAGS.hpconfig) if hps.agent_index == 0: print('Writing to {}'.format(hps.result_dir)) print('Starting process {}'.format(hps.agent_index)) if hps.slurm == False: raise ValueError( 'You should only call this script from setup.py to run on slurm cluster' ) elif hps.distributed == False: Agent(hps) else: # Build the cluster cluster_spec, hps = make_cluster(hps) if hps.agent_index == 0: print('Agent {} sees cluster {}'.format( hps.agent_index, cluster_spec.as_cluster_def())) #cluster = tf.train.ClusterSpec(cluster) if hps.job_type == 'ps': print('Starting parameter server {}'.format(hps.job_index)) ps_function(hps, cluster_spec) else: print('Starting agent {}'.format(hps.job_index)) Agent(hps, cluster_spec)
def main(_): 'Set-up training' # parsing hps = get_hps().parse(FLAGS.hpconfig) import_matplotlib(hps) # Logging and saving hps.base_result_dir, hps.checkpoint_dir = make_result_dir(hps) logger = make_logger(hps.base_result_dir, name='root', level=hps.level) logger.info('Created new base results folder at: {}'.format( hps.base_result_dir)) logger.info('Starting experiment {} on environment {}'.format( hps.name, hps.game)) # Write hyperparameters with open(hps.base_result_dir + 'hps.txt', 'w') as file: file.write(pformat(hps_to_dict(hps))) with open(hps.base_result_dir + 'hps_raw.txt', 'w') as file: file.write(hps_to_list(hps)) run(hps)
pad_mask = self._create_pad_mask(x) _, pred = self.net(x, pad_mask=pad_mask) topk = torch.topk(pred[:, nb_existing], topk, dim=-1) prob = torch.softmax(topk.values / temperature, dim=-1) return topk.indices[:, torch.multinomial(prob, 1, replacement=True)].squeeze() def save_checkpoint(self, path: str): chk = { 'net': self.net.state_dict(), 'opt': self.opt.state_dict(), 'scaler': self.scaler.state_dict() if self.scaler else None, } torch.save(chk, path) def load_checkpoint(self, path: str): chk = torch.load(path, map_location=self.device) self.net.load_state_dict(chk['net']) self.opt.load_state_dict(chk['opt']) if self.scaler: self.scaler.load_state_dict(chk['scaler']) if __name__ == '__main__': from hps import get_hps HPS = get_hps('maestro') l = TchAIkovskyLearner(HPS) print(f"> Number of learnable parameters: {l.get_parameter_count()}") x = torch.randint(0, HPS.nb_tokens, (HPS.batch_size, HPS.sequence_length)).to(HPS.device) for _ in range(1000): print(l.next_token(x, 10))
def process(base_result_dir, overview_dir, rep_dir): print('Processing folder {}'.format(base_result_dir)) # load hps associated with this folder try: with open(base_result_dir + '/hps_raw.txt', 'r') as f: hps_list = f.read() hps = get_hps().parse(hps_list) except Exception as e: print('Base experiment folder {} with error {}'.format( base_result_dir, e)) print('Removing empty folder {}'.format(base_result_dir)) shutil.rmtree(base_result_dir) return # Set-up plots ep_run_plot = xy_subplot(xlabel='episodes', ylabel='Episode reward', row_item=hps.item4, row_seq=hps.seq4, col_item=hps.item3, col_seq=hps.seq3) # av_run_plot = xy_subplot(xlabel='episodes',ylabel='Average reward',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3) # Qsa_run_plot = xy_subplot(xlabel='episodes',ylabel='Qsa norm',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3) # grad_run_plot = xy_subplot(xlabel='episodes',ylabel='Gradient norm',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3) # loss_run_plot = xy_subplot(xlabel='episodes',ylabel='Loss',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3) # load in all data all_empty = True for it1, item1 in enumerate(hps.seq1): for it2, item2 in enumerate(hps.seq2): for it3, item3 in enumerate(hps.seq3): for it4, item4 in enumerate(hps.seq4): result_dir = base_result_dir + '/subplots/' if hps.loop_hyper: result_dir += make_name('', hps.item1, item1, hps.item2, item2, hps.item3, item3, hps.item4, item4) + '/' if not os.path.exists(result_dir): continue #ep_R_plot = xy_plot(ylabel='Episode reward',xlabel='episodes') #av_R_plot = xy_plot(ylabel='Average reward',xlabel='episodes') ep_c, ep_R_c, av_R_c, Qsa_c, grad_c, loss_c = np.array( []), np.array([]), np.array([]), np.array( []), np.array([]), np.array([]) for rep in range(hps.n_rep): read_dir = result_dir + 'rep:{}'.format(rep) + '/' if not os.path.exists(read_dir): continue # Load raw data try: eps = np.loadtxt(read_dir + 'episode_raw.txt') ep_R = np.loadtxt(read_dir + 'ep_reward_raw.txt') if (len(eps) > 0) and (len(ep_R) > 0): # sometimes a txt get accidently empty due to a time limit when writing ep_c = np.append(ep_c, eps) ep_R_c = np.append(ep_R_c, ep_R) all_empty = False #av_R = np.loadtxt(read_dir+'av_reward_raw.txt') #Qsa_norm = np.loadtxt(read_dir+'Qsa_norm_raw.txt') #grad_norm = np.loadtxt(read_dir+'grad_norm_raw.txt') #loss = np.loadtxt(read_dir+'loss_raw.txt') except Exception as e: print(e) continue #av_R_c = np.append(av_R_c,av_R) #Qsa_c = np.append(Qsa_c,Qsa_norm) #grad_c = np.append(grad_c,grad_norm) #loss_c = np.append(loss_c,loss) # Finish repetition plots #ep_R_plot.finish() #av_R_plot.finish() #ep_R_plot.save(result_dir+'episode_reward') #av_R_plot.save(result_dir+'average_reward') if len(ep_c) == 0 or len(ep_R_c) == 0: print('empty {}, skipping folder'.format( base_result_dir)) ep_c = np.array([0, 1]) ep_R_c = np.array([0, 0]) # Do smoothing over repetitions ep_run, ep_R_run = downsample_smooth(ep_c, ep_R_c, down_len=1000, window=50) np.savetxt(result_dir + 'episode_run.txt', ep_run, fmt='%.3g') np.savetxt(result_dir + 'ep_reward_run.txt', ep_R_run, fmt='%.3g') label = make_name('', hps.item1, item1, hps.item2, item2) max_ep = 10000 ep_R_run = ep_R_run[ep_run < max_ep] ep_run = ep_run[ep_run < max_ep] ep_run_plot.add(x=ep_run, y=ep_R_run, row=it4, col=it3, label=label) # try: # ep_run,ep_R_run,av_R_run,Qsa_run,grad_run,loss_run = downsample_smooth(ep_c,ep_R_c,av_R_c,Qsa_c,grad_c,loss_c,down_len=1000,window=50) # np.savetxt(result_dir+'av_reward_run.txt',av_R_run,fmt='%.3g') # np.savetxt(result_dir+'Qsa_run.txt',Qsa_run,fmt='%.3g') # np.savetxt(result_dir+'grad_run.txt',grad_run,fmt='%.3g') # np.savetxt(result_dir+'loss_run.txt',loss_run,fmt='%.3g') # # # # add to the higher level plot # label = make_name('',hps.item1,item1,hps.item2,item2) # av_run_plot.add(x=ep_run,y=av_R_run,row=it4,col=it3,label=label) # Qsa_run_plot.add(x=ep_run,y=Qsa_run,row=it4,col=it3,label=label) # grad_run_plot.add(x=ep_run,y=grad_run,row=it4,col=it3,label=label) # loss_run_plot.add(x=ep_run,y=loss_run,row=it4,col=it3,label=label) # except: # pass #try: # Qsa_run = downsample_smooth(ep_c,ep_R_c,av_R_c,down_len=1000,window=50) # np.savetxt(result_dir+'Qsa_norm_run.txt',Qsa_run,fmt='%.3g') # Qsa_run_plot.add(x=ep_run,y=Qsa_run,row=it4,col=it3,label=label) #except: # pass if all_empty: print('Removing empty folder {}'.format(base_result_dir)) shutil.rmtree(base_result_dir) else: ep_run_plot.finish() ep_run_plot.save(base_result_dir + '/Episode_reward_running', close=False) ep_run_plot.save( overview_dir + 'Episode_reward/{}_{}_{}'.format(hps.game, hps.name, rep_dir)) # av_run_plot.finish() # av_run_plot.save(base_result_dir+'/Average_reward_running',close=False) # av_run_plot.save(overview_dir+'Average_reward/{}_{}_{}'.format(hps.game,hps.name,rep_dir)) # Qsa_run_plot.finish() # Qsa_run_plot.save(base_result_dir+'/Qsa_norm_running',close=False) # Qsa_run_plot.save(overview_dir+'Qsa_norm/{}_{}_{}'.format(hps.game,hps.name,rep_dir)) # grad_run_plot.finish() # grad_run_plot.save(base_result_dir+'/grad_norm_running',close=False) # grad_run_plot.save(overview_dir+'grad_norm/{}_{}_{}'.format(hps.game,hps.name,rep_dir)) # loss_run_plot.finish() # loss_run_plot.save(base_result_dir+'/loss_running',close=False) # loss_run_plot.save(overview_dir+'loss/{}_{}_{}'.format(hps.game,hps.name,rep_dir)) # Mark this folder as processed #os.rename(base_result_dir,base_result_dir+'d') print('Processed folder')
# result.update({'seed':seed,'actions':a_store,'R':best_R}) # best_R = R store_safely(hps.result_dir,'result',result) if (global_t_mcts > hps.n_t) or (ep > hps.n_eps): break # break out of episode loop return result if __name__ == '__main__': '''Set-up training''' parser = argparse.ArgumentParser() parser.add_argument('--hp', help='Hyperparameter configuration',default='') parser.add_argument('--no_plot', action='store_true',default=False) args = parser.parse_args() hps = get_hps().parse(args.hp) hps = override_hps_settings(hps) # set-up result folder if not prespecified if hps.result_dir == '': result_folder = os.getcwd() + '/results/{}/{}/'.format(hps.name,hps.game) hps.result_dir = make_unique_subfolder(result_folder,hyperloop=False) else: with open(hps.result_dir + 'hps.txt','w') as file: file.write(pformat(hps_to_dict(hps))) #with open(subfolder + 'hps_raw.txt','w') as file: # file.write(hps_to_list(hps)) print(' ________________________________________ ') print('Start learning on game {}'.format(hps.game)) result = agent(hps)