def test_get_job_stats_logs_errors(self): # return exp_list = [{ 'model': { 'name': 'mlp', 'n_layers': 30 }, 'dataset': 'mnist', 'batch_size': 1 }] savedir_base = '/mnt/datasets/public/issam/tmp' job_config = { 'volume': ['/mnt:/mnt'], 'image': 'images.borgy.elementai.net/issam.laradji/main', 'bid': '1', 'restartable': '1', 'gpu': '1', 'mem': '20', 'cpu': '2', } run_command = ('python example.py -ei <exp_id> -sb %s' % (savedir_base)) hjb.run_exp_list_jobs(exp_list, savedir_base=savedir_base, workdir=os.path.dirname( os.path.realpath(__file__)), run_command=run_command, job_config=job_config, force_run=True, wait_seconds=0) assert (os.path.exists( os.path.join(savedir_base, hu.hash_dict(exp_list[0]), 'borgy_dict.json'))) jm = hjb.JobManager(exp_list=exp_list, savedir_base=savedir_base) jm_summary_list = jm.get_summary() rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base) rm_summary_list = rm.get_job_summary() assert (rm_summary_list['table'].equals(jm_summary_list['table'])) jm.kill_jobs() assert ('CANCELLED' in jm.get_summary()['status'][0])
def update_rm(self): self.rm = hr.ResultManager(exp_list=self.rm_original.exp_list_all, savedir_base=str(self.t_savedir_base.value), filterby_list=get_dict_from_str(str(self.t_filterby_list.value)), verbose=self.rm_original.verbose, mode_key=self.rm_original.mode_key, has_score_list=self.rm_original.has_score_list ) if len(self.rm.exp_list) == 0: if self.rm.n_exp_all > 0: display('No experiments selected out of %d ' 'for filtrby_list %s' % (self.rm.n_exp_all, self.rm.filterby_list)) display('Table below shows all experiments.') score_table = hr.get_score_df(exp_list=self.rm_original.exp_list_all, savedir_base=self.rm_original.savedir_base) display(score_table) else: display('No experiments exist...') return
print('jobs:', len(job_list_old), len(job_list)) assert (len(job_list_old) + 1) == len(job_list) # command_list = [] # for exp_dict in exp_list: # command_list += [] # hjb.run_command_list(command_list) # jm.launch_menu(command=command) jm.launch_exp_list(command='echo 2 -e <exp_id>', reset=1, in_parallel=False) assert (os.path.exists( os.path.join(savedir_base, hu.hash_dict(exp_list[0]), 'job_dict.json'))) summary_list = jm.get_summary_list() print(hr.filter_list(summary_list, {'job_state': 'SUCCEEDED'})) print(hr.group_list(summary_list, key='job_state', return_count=True)) rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base, account_id='75ce4cee-6829-4274-80e1-77e89559ddfb') rm_summary_list = rm.get_job_summary() db = hj.get_dashboard(rm, wide_display=True) db.display() # assert(rm_summary_list['table'].equals(jm_summary_list['table'])) # jm.kill_jobs() # assert('CANCELLED' in jm.get_summary()['status'][0])
def test_get_result_manager(self): # save a score_list savedir_base = '.tmp_plots' if os.path.exists(savedir_base): shutil.rmtree(savedir_base) exp_dict = {'model':{'name':'mlp', 'n_layers':30}, 'dataset':'mnist', 'batch_size':1} score_list = [{'epoch': 0, 'acc':0.5}, {'epoch': 1, 'acc':0.9}] hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'score_list.pkl'), score_list) hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'exp_dict.json'), exp_dict) exp_dict = {'model':{'name':'mlp', 'n_layers':30}, 'dataset':'cifar10', 'batch_size':1} score_list = [{'epoch': 0, 'acc':0.25}, {'epoch': 1, 'acc':1.24}, {'epoch': 2, 'acc':1.5}] hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'score_list.pkl'), score_list) hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'exp_dict.json'), exp_dict) exp_dict = {'model':{'name':'lenet', 'n_layers':30}, 'dataset':'cifar10', 'batch_size':1} score_list = [{'epoch': 0, 'acc':0.35}, {'epoch': 1, 'acc':1.2}, {'epoch': 2, 'acc':1.3}] hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'score_list.pkl'), score_list) hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'exp_dict.json'), exp_dict) exp_dict = {'model':{'name':'lenet', 'n_layers':30}, 'dataset':'cifar10', 'batch_size':5} score_list = [{'epoch': 0, 'acc':0.15}, {'epoch': 1, 'acc':1.21}, {'epoch': 2, 'acc':1.7}] hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'score_list.pkl'), score_list) hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict), 'exp_dict.json'), exp_dict) rm = hr.ResultManager(savedir_base=savedir_base) # assert(len(rm.exp_groups) == 2) # for exp_list in rm.exp_groups: # assert(exp_list[0]['dataset'] in ['mnist', 'cifar10']) rm.get_exp_list_df() rm.get_score_df(avg_across='dataset') rm.get_score_df(avg_across='dataset', add_prefix=True) rm.get_score_df() rm.get_score_lists() rm.get_images() table = rm.get_score_table() table = rm.get_exp_table() fig_list = rm.get_plot(x_metric='epoch', y_metric='acc', title_list=['dataset'], legend_list=['model']) for i, fig in enumerate(fig_list): fig.savefig(os.path.join(savedir_base, '%d.png' % i)) order = 'groups_by_metrics' fig_list = rm.get_plot_all(order=order, x_metric='epoch', y_metric_list=['acc', 'epoch'], title_list=['dataset'], legend_list=['model'], groupby_list=['dataset'], log_metric_list=['acc'], map_title_list=[{'mnist':'MNIST'}, {'cifar10':'CIFAR-10'}], map_xlabel_list=[{'epoch':'EPOCHS'}], map_ylabel_list=[{'acc':'Score'}], ylim_list=[[(0.5, 0.8),(0.5, 0.8)], [(0.5, 0.8),(0.5, 0.8)]]) for i, fig in enumerate(fig_list): fig.savefig(os.path.join(savedir_base, '%s_%d.png' % (order, i))) order = 'metrics_by_groups' fig_list = rm.get_plot_all(order=order, x_metric='epoch', y_metric_list=['acc', 'epoch'], title_list=['dataset'], legend_list=['model'], avg_across='batch_size') for i, fig in enumerate(fig_list): fig.savefig(os.path.join(savedir_base, '%s_%d.png' % (order, i)))
job_list = jm.get_jobs() job = jm.get_job(job_id) assert job_list[0].id == job_id # jm.kill_job(job_list[0].id) # run print('jobs:', len(job_list_old), len(job_list)) assert (len(job_list_old) + 1) == len(job_list) # command_list = [] # for exp_dict in exp_list: # command_list += [] # hjb.run_command_list(command_list) # jm.launch_menu(command=command) jm.launch_exp_list(command='echo 2 -e <exp_id>', reset=1, in_parallel=False) assert(os.path.exists(os.path.join(savedir_base, hu.hash_dict(exp_list[0]), 'job_dict.json'))) summary_list = jm.get_summary_list() print(hr.filter_list(summary_list, {'job_state':'SUCCEEDED'})) print(hr.group_list(summary_list, key='job_state', return_count=True)) rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base) rm_summary_list = rm.get_job_summary() db = hj.get_dashboard(rm, wide_display=True) db.display() # assert(rm_summary_list['table'].equals(jm_summary_list['table'])) # jm.kill_jobs() # assert('CANCELLED' in jm.get_summary()['status'][0])
def test_toolkit(): # toolkit tests import job_configs exp_list = [{ "model": { "name": "mlp", "n_layers": 20 }, "dataset": "mnist", "batch_size": 1 }] savedir_base = os.path.realpath(".tmp") os.makedirs(savedir_base, exist_ok=True) jm = hjb.JobManager( exp_list=exp_list, savedir_base=savedir_base, workdir=os.path.dirname(os.path.realpath(__file__)), job_config=job_configs.JOB_CONFIG, ) # get jobs job_list_old = jm.get_jobs() # run single command savedir_logs = "%s/%s" % (savedir_base, np.random.randint(1000)) os.makedirs(savedir_logs, exist_ok=True) command = "echo 2" job_id = jm.submit_job(command, workdir=jm.workdir, savedir_logs=savedir_logs) # get jobs job_list = jm.get_jobs() job = jm.get_job(job_id) assert job_list[0]["id"] == job_id # jm.kill_job(job_list[0].id) # run print("jobs:", len(job_list_old), len(job_list)) assert (len(job_list_old) + 1) == len(job_list) # command_list = [] # for exp_dict in exp_list: # command_list += [] # hjb.run_command_list(command_list) # jm.launch_menu(command=command) jm.launch_exp_list(command="echo 2 -e <exp_id>", reset=1, in_parallel=False) assert os.path.exists( os.path.join(savedir_base, hu.hash_dict(exp_list[0]), "job_dict.json")) summary_list = jm.get_summary_list() print(hu.filter_list(summary_list, {"job_state": "SUCCEEDED"})) print(hu.group_list(summary_list, key="job_state", return_count=True)) rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base) rm_summary_list = rm.get_job_summary() db = hj.get_dashboard(rm, wide_display=True) db.display()
def test_get_result_manager(): # save a score_list savedir_base = ".tmp_plots" if os.path.exists(savedir_base): shutil.rmtree(savedir_base) exp_dict = { "model": { "name": "mlp", "n_layers": 30 }, "dataset": "mnist", "batch_size": 1 } score_list = [{"epoch": 0, "acc": 0.5}, {"epoch": 1, "acc": 0.9}] hu.save_pkl( os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"), score_list) hu.save_json( os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"), exp_dict) exp_dict = { "model": { "name": "mlp", "n_layers": 30 }, "dataset": "cifar10", "batch_size": 1 } score_list = [{ "epoch": 0, "acc": 0.25 }, { "epoch": 1, "acc": 1.24 }, { "epoch": 2, "acc": 1.5 }] hu.save_pkl( os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"), score_list) hu.save_json( os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"), exp_dict) exp_dict = { "model": { "name": "lenet", "n_layers": 30 }, "dataset": "cifar10", "batch_size": 1 } score_list = [{ "epoch": 0, "acc": 0.35 }, { "epoch": 1, "acc": 1.2 }, { "epoch": 2, "acc": 1.3 }] hu.save_pkl( os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"), score_list) hu.save_json( os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"), exp_dict) exp_dict = { "model": { "name": "lenet", "n_layers": 30 }, "dataset": "cifar10", "batch_size": 5 } score_list = [{ "epoch": 0, "acc": 0.15 }, { "epoch": 1, "acc": 1.21 }, { "epoch": 2, "acc": 1.7 }] hu.save_pkl( os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"), score_list) hu.save_json( os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"), exp_dict) rm = hr.ResultManager(savedir_base=savedir_base) # assert(len(rm.exp_groups) == 2) # for exp_list in rm.exp_groups: # assert(exp_list[0]['dataset'] in ['mnist', 'cifar10']) rm.get_exp_list_df() rm.get_score_df(avg_across="dataset") rm.get_score_df(avg_across="dataset") rm.get_score_df() rm.get_score_lists() rm.get_images() table = rm.get_score_table() table = rm.get_exp_table() fig_list = rm.get_plot(x_metric="epoch", y_metric="acc", title_list=["dataset"], legend_list=["model"]) for i, fig in enumerate(fig_list): fig.savefig(os.path.join(savedir_base, "%d.png" % i)) order = "groups_by_metrics" fig_list = rm.get_plot_all( order=order, x_metric="epoch", y_metric_list=["acc", "epoch"], title_list=["dataset"], legend_list=["model"], groupby_list=["dataset"], log_metric_list=["acc"], map_title_list=[{ "mnist": "MNIST" }, { "cifar10": "CIFAR-10" }], map_xlabel_list=[{ "epoch": "EPOCHS" }], map_ylabel_list=[{ "acc": "Score" }], ylim_list=[[(0.5, 0.8), (0.5, 0.8)], [(0.5, 0.8), (0.5, 0.8)]], ) for i, fig in enumerate(fig_list): fig.savefig(os.path.join(savedir_base, "%s_%d.png" % (order, i))) order = "metrics_by_groups" fig_list = rm.get_plot_all( order=order, x_metric="epoch", y_metric_list=["acc", "epoch"], title_list=["dataset"], legend_list=["model"], avg_across="batch_size", ) for i, fig in enumerate(fig_list): fig.savefig(os.path.join(savedir_base, "%s_%d.png" % (order, i)))