Example #1
0
    def test_get_job_stats_logs_errors(self):
        # return
        exp_list = [{
            'model': {
                'name': 'mlp',
                'n_layers': 30
            },
            'dataset': 'mnist',
            'batch_size': 1
        }]
        savedir_base = '/mnt/datasets/public/issam/tmp'
        job_config = {
            'volume': ['/mnt:/mnt'],
            'image': 'images.borgy.elementai.net/issam.laradji/main',
            'bid': '1',
            'restartable': '1',
            'gpu': '1',
            'mem': '20',
            'cpu': '2',
        }
        run_command = ('python example.py -ei <exp_id> -sb %s' %
                       (savedir_base))

        hjb.run_exp_list_jobs(exp_list,
                              savedir_base=savedir_base,
                              workdir=os.path.dirname(
                                  os.path.realpath(__file__)),
                              run_command=run_command,
                              job_config=job_config,
                              force_run=True,
                              wait_seconds=0)
        assert (os.path.exists(
            os.path.join(savedir_base, hu.hash_dict(exp_list[0]),
                         'borgy_dict.json')))
        jm = hjb.JobManager(exp_list=exp_list, savedir_base=savedir_base)
        jm_summary_list = jm.get_summary()
        rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base)
        rm_summary_list = rm.get_job_summary()
        assert (rm_summary_list['table'].equals(jm_summary_list['table']))

        jm.kill_jobs()
        assert ('CANCELLED' in jm.get_summary()['status'][0])
Example #2
0
    def update_rm(self):
        self.rm = hr.ResultManager(exp_list=self.rm_original.exp_list_all, 
                    savedir_base=str(self.t_savedir_base.value), 
                    filterby_list=get_dict_from_str(str(self.t_filterby_list.value)),
                    verbose=self.rm_original.verbose,
                    mode_key=self.rm_original.mode_key,
                    has_score_list=self.rm_original.has_score_list
                    )

        if len(self.rm.exp_list) == 0:
            if self.rm.n_exp_all > 0:
                display('No experiments selected out of %d '
                    'for filtrby_list %s' % (self.rm.n_exp_all,
                                            self.rm.filterby_list))
                display('Table below shows all experiments.')
                score_table = hr.get_score_df(exp_list=self.rm_original.exp_list_all,
                                              savedir_base=self.rm_original.savedir_base)
                display(score_table)
            else:
                display('No experiments exist...')
            return
Example #3
0
    print('jobs:', len(job_list_old), len(job_list))
    assert (len(job_list_old) + 1) == len(job_list)

    # command_list = []
    # for exp_dict in exp_list:
    #     command_list += []

    # hjb.run_command_list(command_list)
    # jm.launch_menu(command=command)
    jm.launch_exp_list(command='echo 2 -e <exp_id>',
                       reset=1,
                       in_parallel=False)

    assert (os.path.exists(
        os.path.join(savedir_base, hu.hash_dict(exp_list[0]),
                     'job_dict.json')))
    summary_list = jm.get_summary_list()
    print(hr.filter_list(summary_list, {'job_state': 'SUCCEEDED'}))
    print(hr.group_list(summary_list, key='job_state', return_count=True))

    rm = hr.ResultManager(exp_list=exp_list,
                          savedir_base=savedir_base,
                          account_id='75ce4cee-6829-4274-80e1-77e89559ddfb')
    rm_summary_list = rm.get_job_summary()

    db = hj.get_dashboard(rm, wide_display=True)
    db.display()
    # assert(rm_summary_list['table'].equals(jm_summary_list['table']))

    # jm.kill_jobs()
    # assert('CANCELLED' in jm.get_summary()['status'][0])
Example #4
0
    def test_get_result_manager(self):
        # save a score_list
        savedir_base = '.tmp_plots'
        if os.path.exists(savedir_base):
            shutil.rmtree(savedir_base)
        exp_dict = {'model':{'name':'mlp', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.5}, {'epoch': 1, 'acc':0.9}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'score_list.pkl'), score_list)
        hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'exp_dict.json'), exp_dict)

        exp_dict = {'model':{'name':'mlp', 'n_layers':30}, 
                    'dataset':'cifar10', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.25}, {'epoch': 1, 'acc':1.24}, {'epoch': 2, 'acc':1.5}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'score_list.pkl'), score_list)
        hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'exp_dict.json'), exp_dict)

        exp_dict = {'model':{'name':'lenet', 'n_layers':30}, 
                    'dataset':'cifar10', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.35}, {'epoch': 1, 'acc':1.2}, {'epoch': 2, 'acc':1.3}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'score_list.pkl'), score_list)
        hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'exp_dict.json'), exp_dict)
                     
        exp_dict = {'model':{'name':'lenet', 'n_layers':30}, 
                    'dataset':'cifar10', 'batch_size':5}
        score_list = [{'epoch': 0, 'acc':0.15}, {'epoch': 1, 'acc':1.21}, {'epoch': 2, 'acc':1.7}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'score_list.pkl'), score_list)
        hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'exp_dict.json'), exp_dict)

        rm = hr.ResultManager(savedir_base=savedir_base)
        
        # assert(len(rm.exp_groups) == 2)
        # for exp_list in rm.exp_groups:
        #     assert(exp_list[0]['dataset'] in ['mnist', 'cifar10'])
        rm.get_exp_list_df()
        rm.get_score_df(avg_across='dataset')
        rm.get_score_df(avg_across='dataset', add_prefix=True)
        rm.get_score_df()
        rm.get_score_lists()
        rm.get_images()
        table = rm.get_score_table()
        table = rm.get_exp_table()
        
        fig_list = rm.get_plot(x_metric='epoch', y_metric='acc', title_list=['dataset'], legend_list=['model'])
        for i, fig in enumerate(fig_list):
            fig.savefig(os.path.join(savedir_base, '%d.png' % i))
        

        order = 'groups_by_metrics'
        fig_list = rm.get_plot_all(order=order, x_metric='epoch', y_metric_list=['acc', 'epoch'], title_list=['dataset'], 
                              legend_list=['model'], 
                              groupby_list=['dataset'],
                              log_metric_list=['acc'],
                              map_title_list=[{'mnist':'MNIST'}, {'cifar10':'CIFAR-10'}],
                              map_xlabel_list=[{'epoch':'EPOCHS'}],
                              map_ylabel_list=[{'acc':'Score'}],
                              ylim_list=[[(0.5, 0.8),(0.5, 0.8)],
                                         [(0.5, 0.8),(0.5, 0.8)]])

        for i, fig in enumerate(fig_list):
            fig.savefig(os.path.join(savedir_base, '%s_%d.png' % (order, i)))
        
        order = 'metrics_by_groups'
        fig_list = rm.get_plot_all(order=order, x_metric='epoch', y_metric_list=['acc', 'epoch'], title_list=['dataset'], 
                              legend_list=['model'], avg_across='batch_size')
        for i, fig in enumerate(fig_list):
            fig.savefig(os.path.join(savedir_base, '%s_%d.png' % (order, i)))
Example #5
0
    job_list = jm.get_jobs()
    job = jm.get_job(job_id)
    assert job_list[0].id == job_id
    
    # jm.kill_job(job_list[0].id)
    # run
    print('jobs:', len(job_list_old), len(job_list))
    assert (len(job_list_old) + 1) ==  len(job_list)

    # command_list = []
    # for exp_dict in exp_list:
    #     command_list += []

    # hjb.run_command_list(command_list)
    # jm.launch_menu(command=command)
    jm.launch_exp_list(command='echo 2 -e <exp_id>', reset=1, in_parallel=False)
    
    assert(os.path.exists(os.path.join(savedir_base, hu.hash_dict(exp_list[0]), 'job_dict.json')))
    summary_list = jm.get_summary_list()
    print(hr.filter_list(summary_list, {'job_state':'SUCCEEDED'}))
    print(hr.group_list(summary_list, key='job_state', return_count=True))
    
    rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base)
    rm_summary_list = rm.get_job_summary()

    db = hj.get_dashboard(rm,  wide_display=True)
    db.display()
    # assert(rm_summary_list['table'].equals(jm_summary_list['table']))
    
    # jm.kill_jobs()
    # assert('CANCELLED' in jm.get_summary()['status'][0])
Example #6
0
def test_toolkit():
    # toolkit tests
    import job_configs

    exp_list = [{
        "model": {
            "name": "mlp",
            "n_layers": 20
        },
        "dataset": "mnist",
        "batch_size": 1
    }]
    savedir_base = os.path.realpath(".tmp")
    os.makedirs(savedir_base, exist_ok=True)
    jm = hjb.JobManager(
        exp_list=exp_list,
        savedir_base=savedir_base,
        workdir=os.path.dirname(os.path.realpath(__file__)),
        job_config=job_configs.JOB_CONFIG,
    )
    # get jobs
    job_list_old = jm.get_jobs()

    # run single command
    savedir_logs = "%s/%s" % (savedir_base, np.random.randint(1000))
    os.makedirs(savedir_logs, exist_ok=True)
    command = "echo 2"
    job_id = jm.submit_job(command,
                           workdir=jm.workdir,
                           savedir_logs=savedir_logs)

    # get jobs
    job_list = jm.get_jobs()
    job = jm.get_job(job_id)
    assert job_list[0]["id"] == job_id

    # jm.kill_job(job_list[0].id)
    # run
    print("jobs:", len(job_list_old), len(job_list))
    assert (len(job_list_old) + 1) == len(job_list)

    # command_list = []
    # for exp_dict in exp_list:
    #     command_list += []

    # hjb.run_command_list(command_list)
    # jm.launch_menu(command=command)
    jm.launch_exp_list(command="echo 2 -e <exp_id>",
                       reset=1,
                       in_parallel=False)

    assert os.path.exists(
        os.path.join(savedir_base, hu.hash_dict(exp_list[0]), "job_dict.json"))
    summary_list = jm.get_summary_list()
    print(hu.filter_list(summary_list, {"job_state": "SUCCEEDED"}))
    print(hu.group_list(summary_list, key="job_state", return_count=True))

    rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base)
    rm_summary_list = rm.get_job_summary()

    db = hj.get_dashboard(rm, wide_display=True)
    db.display()
Example #7
0
def test_get_result_manager():
    # save a score_list
    savedir_base = ".tmp_plots"
    if os.path.exists(savedir_base):
        shutil.rmtree(savedir_base)
    exp_dict = {
        "model": {
            "name": "mlp",
            "n_layers": 30
        },
        "dataset": "mnist",
        "batch_size": 1
    }
    score_list = [{"epoch": 0, "acc": 0.5}, {"epoch": 1, "acc": 0.9}]

    hu.save_pkl(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"),
        score_list)
    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"),
        exp_dict)

    exp_dict = {
        "model": {
            "name": "mlp",
            "n_layers": 30
        },
        "dataset": "cifar10",
        "batch_size": 1
    }
    score_list = [{
        "epoch": 0,
        "acc": 0.25
    }, {
        "epoch": 1,
        "acc": 1.24
    }, {
        "epoch": 2,
        "acc": 1.5
    }]

    hu.save_pkl(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"),
        score_list)
    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"),
        exp_dict)

    exp_dict = {
        "model": {
            "name": "lenet",
            "n_layers": 30
        },
        "dataset": "cifar10",
        "batch_size": 1
    }
    score_list = [{
        "epoch": 0,
        "acc": 0.35
    }, {
        "epoch": 1,
        "acc": 1.2
    }, {
        "epoch": 2,
        "acc": 1.3
    }]

    hu.save_pkl(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"),
        score_list)
    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"),
        exp_dict)

    exp_dict = {
        "model": {
            "name": "lenet",
            "n_layers": 30
        },
        "dataset": "cifar10",
        "batch_size": 5
    }
    score_list = [{
        "epoch": 0,
        "acc": 0.15
    }, {
        "epoch": 1,
        "acc": 1.21
    }, {
        "epoch": 2,
        "acc": 1.7
    }]

    hu.save_pkl(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"),
        score_list)
    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"),
        exp_dict)

    rm = hr.ResultManager(savedir_base=savedir_base)

    # assert(len(rm.exp_groups) == 2)
    # for exp_list in rm.exp_groups:
    #     assert(exp_list[0]['dataset'] in ['mnist', 'cifar10'])
    rm.get_exp_list_df()
    rm.get_score_df(avg_across="dataset")
    rm.get_score_df(avg_across="dataset")
    rm.get_score_df()
    rm.get_score_lists()
    rm.get_images()
    table = rm.get_score_table()
    table = rm.get_exp_table()

    fig_list = rm.get_plot(x_metric="epoch",
                           y_metric="acc",
                           title_list=["dataset"],
                           legend_list=["model"])
    for i, fig in enumerate(fig_list):
        fig.savefig(os.path.join(savedir_base, "%d.png" % i))

    order = "groups_by_metrics"
    fig_list = rm.get_plot_all(
        order=order,
        x_metric="epoch",
        y_metric_list=["acc", "epoch"],
        title_list=["dataset"],
        legend_list=["model"],
        groupby_list=["dataset"],
        log_metric_list=["acc"],
        map_title_list=[{
            "mnist": "MNIST"
        }, {
            "cifar10": "CIFAR-10"
        }],
        map_xlabel_list=[{
            "epoch": "EPOCHS"
        }],
        map_ylabel_list=[{
            "acc": "Score"
        }],
        ylim_list=[[(0.5, 0.8), (0.5, 0.8)], [(0.5, 0.8), (0.5, 0.8)]],
    )

    for i, fig in enumerate(fig_list):
        fig.savefig(os.path.join(savedir_base, "%s_%d.png" % (order, i)))

    order = "metrics_by_groups"
    fig_list = rm.get_plot_all(
        order=order,
        x_metric="epoch",
        y_metric_list=["acc", "epoch"],
        title_list=["dataset"],
        legend_list=["model"],
        avg_across="batch_size",
    )
    for i, fig in enumerate(fig_list):
        fig.savefig(os.path.join(savedir_base, "%s_%d.png" % (order, i)))