def main(_):
    hps = get_hps().parse(FLAGS.hpconfig)
    if hps.agent_index == 0:
        print('Writing to {}'.format(hps.result_dir))
    print('Starting process {}'.format(hps.agent_index))

    if hps.slurm == False:
        raise ValueError(
            'You should only call this script from setup.py to run on slurm cluster'
        )
    elif hps.distributed == False:
        Agent(hps)
    else:
        # Build the cluster
        cluster_spec, hps = make_cluster(hps)
        if hps.agent_index == 0:
            print('Agent {} sees cluster {}'.format(
                hps.agent_index, cluster_spec.as_cluster_def()))
        #cluster = tf.train.ClusterSpec(cluster)
        if hps.job_type == 'ps':
            print('Starting parameter server {}'.format(hps.job_index))
            ps_function(hps, cluster_spec)
        else:
            print('Starting agent {}'.format(hps.job_index))
            Agent(hps, cluster_spec)
def main(_):
    'Set-up training'
    # parsing
    hps = get_hps().parse(FLAGS.hpconfig)
    import_matplotlib(hps)

    # Logging and saving
    hps.base_result_dir, hps.checkpoint_dir = make_result_dir(hps)
    logger = make_logger(hps.base_result_dir, name='root', level=hps.level)
    logger.info('Created new base results folder at: {}'.format(
        hps.base_result_dir))
    logger.info('Starting experiment {} on environment {}'.format(
        hps.name, hps.game))

    # Write hyperparameters
    with open(hps.base_result_dir + 'hps.txt', 'w') as file:
        file.write(pformat(hps_to_dict(hps)))
    with open(hps.base_result_dir + 'hps_raw.txt', 'w') as file:
        file.write(hps_to_list(hps))
    run(hps)
Esempio n. 3
0
            pad_mask = self._create_pad_mask(x)
            _, pred = self.net(x, pad_mask=pad_mask)

            topk = torch.topk(pred[:, nb_existing], topk, dim=-1)
            prob = torch.softmax(topk.values / temperature, dim=-1)
            return topk.indices[:, torch.multinomial(prob, 1, replacement=True)].squeeze()

    def save_checkpoint(self, path: str):
        chk = {
            'net': self.net.state_dict(), 
            'opt': self.opt.state_dict(),
            'scaler': self.scaler.state_dict() if self.scaler else None,
        }
        torch.save(chk, path)

    def load_checkpoint(self, path: str):
        chk = torch.load(path, map_location=self.device)
        self.net.load_state_dict(chk['net'])
        self.opt.load_state_dict(chk['opt'])
        if self.scaler:
            self.scaler.load_state_dict(chk['scaler'])

if __name__ == '__main__':
    from hps import get_hps
    HPS = get_hps('maestro')
    l = TchAIkovskyLearner(HPS)
    print(f"> Number of learnable parameters: {l.get_parameter_count()}")
    x = torch.randint(0, HPS.nb_tokens, (HPS.batch_size, HPS.sequence_length)).to(HPS.device)
    for _ in range(1000):
        print(l.next_token(x, 10))
Esempio n. 4
0
def process(base_result_dir, overview_dir, rep_dir):
    print('Processing folder {}'.format(base_result_dir))

    # load hps associated with this folder
    try:
        with open(base_result_dir + '/hps_raw.txt', 'r') as f:
            hps_list = f.read()
        hps = get_hps().parse(hps_list)
    except Exception as e:
        print('Base experiment folder {} with error {}'.format(
            base_result_dir, e))
        print('Removing empty folder {}'.format(base_result_dir))
        shutil.rmtree(base_result_dir)
        return

    # Set-up plots
    ep_run_plot = xy_subplot(xlabel='episodes',
                             ylabel='Episode reward',
                             row_item=hps.item4,
                             row_seq=hps.seq4,
                             col_item=hps.item3,
                             col_seq=hps.seq3)
    #    av_run_plot = xy_subplot(xlabel='episodes',ylabel='Average reward',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3)
    #    Qsa_run_plot = xy_subplot(xlabel='episodes',ylabel='Qsa norm',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3)
    #    grad_run_plot = xy_subplot(xlabel='episodes',ylabel='Gradient norm',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3)
    #    loss_run_plot = xy_subplot(xlabel='episodes',ylabel='Loss',row_item=hps.item4,row_seq=hps.seq4,col_item=hps.item3,col_seq=hps.seq3)

    # load in all data
    all_empty = True
    for it1, item1 in enumerate(hps.seq1):
        for it2, item2 in enumerate(hps.seq2):
            for it3, item3 in enumerate(hps.seq3):
                for it4, item4 in enumerate(hps.seq4):
                    result_dir = base_result_dir + '/subplots/'
                    if hps.loop_hyper:
                        result_dir += make_name('', hps.item1, item1,
                                                hps.item2, item2, hps.item3,
                                                item3, hps.item4, item4) + '/'

                    if not os.path.exists(result_dir):
                        continue

                    #ep_R_plot = xy_plot(ylabel='Episode reward',xlabel='episodes')
                    #av_R_plot = xy_plot(ylabel='Average reward',xlabel='episodes')
                    ep_c, ep_R_c, av_R_c, Qsa_c, grad_c, loss_c = np.array(
                        []), np.array([]), np.array([]), np.array(
                            []), np.array([]), np.array([])

                    for rep in range(hps.n_rep):
                        read_dir = result_dir + 'rep:{}'.format(rep) + '/'
                        if not os.path.exists(read_dir):
                            continue

                        # Load raw data
                        try:
                            eps = np.loadtxt(read_dir + 'episode_raw.txt')
                            ep_R = np.loadtxt(read_dir + 'ep_reward_raw.txt')

                            if (len(eps) > 0) and (len(ep_R) > 0):
                                # sometimes a txt get accidently empty due to a time limit when writing
                                ep_c = np.append(ep_c, eps)
                                ep_R_c = np.append(ep_R_c, ep_R)

                            all_empty = False

                            #av_R = np.loadtxt(read_dir+'av_reward_raw.txt')
                            #Qsa_norm = np.loadtxt(read_dir+'Qsa_norm_raw.txt')
                            #grad_norm = np.loadtxt(read_dir+'grad_norm_raw.txt')
                            #loss = np.loadtxt(read_dir+'loss_raw.txt')
                        except Exception as e:
                            print(e)
                            continue

                            #av_R_c = np.append(av_R_c,av_R)
                            #Qsa_c = np.append(Qsa_c,Qsa_norm)
                            #grad_c = np.append(grad_c,grad_norm)
                            #loss_c = np.append(loss_c,loss)

                    # Finish repetition plots
                    #ep_R_plot.finish()
                    #av_R_plot.finish()
                    #ep_R_plot.save(result_dir+'episode_reward')
                    #av_R_plot.save(result_dir+'average_reward')

                    if len(ep_c) == 0 or len(ep_R_c) == 0:
                        print('empty {}, skipping folder'.format(
                            base_result_dir))
                        ep_c = np.array([0, 1])
                        ep_R_c = np.array([0, 0])

                    # Do smoothing over repetitions
                    ep_run, ep_R_run = downsample_smooth(ep_c,
                                                         ep_R_c,
                                                         down_len=1000,
                                                         window=50)

                    np.savetxt(result_dir + 'episode_run.txt',
                               ep_run,
                               fmt='%.3g')
                    np.savetxt(result_dir + 'ep_reward_run.txt',
                               ep_R_run,
                               fmt='%.3g')

                    label = make_name('', hps.item1, item1, hps.item2, item2)

                    max_ep = 10000
                    ep_R_run = ep_R_run[ep_run < max_ep]
                    ep_run = ep_run[ep_run < max_ep]
                    ep_run_plot.add(x=ep_run,
                                    y=ep_R_run,
                                    row=it4,
                                    col=it3,
                                    label=label)

#                    try:
#                        ep_run,ep_R_run,av_R_run,Qsa_run,grad_run,loss_run = downsample_smooth(ep_c,ep_R_c,av_R_c,Qsa_c,grad_c,loss_c,down_len=1000,window=50)
#                        np.savetxt(result_dir+'av_reward_run.txt',av_R_run,fmt='%.3g')
#                        np.savetxt(result_dir+'Qsa_run.txt',Qsa_run,fmt='%.3g')
#                        np.savetxt(result_dir+'grad_run.txt',grad_run,fmt='%.3g')
#                        np.savetxt(result_dir+'loss_run.txt',loss_run,fmt='%.3g')
#
#
#                        # add to the higher level plot
#                        label = make_name('',hps.item1,item1,hps.item2,item2)
#                        av_run_plot.add(x=ep_run,y=av_R_run,row=it4,col=it3,label=label)
#                        Qsa_run_plot.add(x=ep_run,y=Qsa_run,row=it4,col=it3,label=label)
#                        grad_run_plot.add(x=ep_run,y=grad_run,row=it4,col=it3,label=label)
#                        loss_run_plot.add(x=ep_run,y=loss_run,row=it4,col=it3,label=label)
#                    except:
#                        pass
#try:
#    Qsa_run = downsample_smooth(ep_c,ep_R_c,av_R_c,down_len=1000,window=50)
#    np.savetxt(result_dir+'Qsa_norm_run.txt',Qsa_run,fmt='%.3g')
#    Qsa_run_plot.add(x=ep_run,y=Qsa_run,row=it4,col=it3,label=label)
#except:
#    pass
    if all_empty:
        print('Removing empty folder {}'.format(base_result_dir))
        shutil.rmtree(base_result_dir)
    else:
        ep_run_plot.finish()
        ep_run_plot.save(base_result_dir + '/Episode_reward_running',
                         close=False)
        ep_run_plot.save(
            overview_dir +
            'Episode_reward/{}_{}_{}'.format(hps.game, hps.name, rep_dir))


#        av_run_plot.finish()
#        av_run_plot.save(base_result_dir+'/Average_reward_running',close=False)
#        av_run_plot.save(overview_dir+'Average_reward/{}_{}_{}'.format(hps.game,hps.name,rep_dir))
#        Qsa_run_plot.finish()
#        Qsa_run_plot.save(base_result_dir+'/Qsa_norm_running',close=False)
#        Qsa_run_plot.save(overview_dir+'Qsa_norm/{}_{}_{}'.format(hps.game,hps.name,rep_dir))
#        grad_run_plot.finish()
#        grad_run_plot.save(base_result_dir+'/grad_norm_running',close=False)
#        grad_run_plot.save(overview_dir+'grad_norm/{}_{}_{}'.format(hps.game,hps.name,rep_dir))
#        loss_run_plot.finish()
#        loss_run_plot.save(base_result_dir+'/loss_running',close=False)
#        loss_run_plot.save(overview_dir+'loss/{}_{}_{}'.format(hps.game,hps.name,rep_dir))

# Mark this folder as processed
#os.rename(base_result_dir,base_result_dir+'d')
    print('Processed folder')
Esempio n. 5
0
            #    result.update({'seed':seed,'actions':a_store,'R':best_R})
            #    best_R = R
            store_safely(hps.result_dir,'result',result)
  
            if (global_t_mcts > hps.n_t) or (ep > hps.n_eps):
                break # break out of episode loop
    
    return result
    
if __name__ == '__main__':
    '''Set-up training'''
    parser = argparse.ArgumentParser()
    parser.add_argument('--hp', help='Hyperparameter configuration',default='')
    parser.add_argument('--no_plot', action='store_true',default=False)
    args = parser.parse_args()
    hps = get_hps().parse(args.hp)    
    hps = override_hps_settings(hps)

    # set-up result folder if not prespecified
    if hps.result_dir == '':
        result_folder = os.getcwd() + '/results/{}/{}/'.format(hps.name,hps.game)
        hps.result_dir = make_unique_subfolder(result_folder,hyperloop=False)
    else:
        with open(hps.result_dir + 'hps.txt','w') as file:
            file.write(pformat(hps_to_dict(hps)))
    #with open(subfolder + 'hps_raw.txt','w') as file:
    #    file.write(hps_to_list(hps)) 
    print(' ________________________________________ ')     
    print('Start learning on game {}'.format(hps.game))               
    result = agent(hps)