def test_wizard(): savedir_base = ".tmp" hw.run_wizard( func=test_trainval, exp_list=[{ "lr": 1e-3 }], savedir_base=savedir_base, reset=0, results_fname=f"{savedir_base}/results.ipynb", ) shutil.rmtree(".tmp")
# 7. create main if __name__ == '__main__': # 8. define a list of experiments exp_list = [] for lr in [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]: exp_list += [{'lr':lr, 'dataset':'syn', 'model':'linear'}] # 9. Launch experiments using magic command parser = argparse.ArgumentParser() parser.add_argument('-sb', '--savedir_base', default=None, help='Define the base directory where the experiments will be saved.') parser.add_argument("-r", "--reset", default=0, type=int, help='Reset or resume the experiment.') parser.add_argument("-j", "--run_jobs", default=0, type=int, help='Run jobs in cluster.') args, others = parser.parse_known_args() if args.run_jobs == 1: import slurm_config job_config = slurm_config.JOB_CONFIG elif args.run_jobs == 2: import job_configs job_config = job_configs.JOB_CONFIG else: job_config = None hw.run_wizard(func=trainval, exp_list=exp_list, savedir_base=args.savedir_base, reset=args.reset, job_config=job_config)
default=0, type=int, help='Reset or resume the experiment.') parser.add_argument("--debug", default=False, type=int, help='Debug mode.') parser.add_argument("-ei", "--exp_id", default=None, help='Run a specific experiment based on its id.') parser.add_argument("-j", "--run_jobs", default=0, type=int, help='Run the experiments as jobs in the cluster.') parser.add_argument( "-nw", "--num_workers", type=int, default=0, help='Specify the number of workers in the dataloader.') parser.add_argument("-v", "--visualize_notebook", type=str, default='', help='Create a jupyter file to visualize the results.') parser.add_argument("-uc", "--use_cuda", type=int, default=1) args, others = parser.parse_known_args() # 9. Launch experiments using magic command hw.run_wizard(func=trainval, exp_groups=exp_configs.EXP_GROUPS, args=args)
model = he.get_model(name=exp_dict['model'], exp_dict=exp_dict) # 3. load checkpoint chk_dict = hw.get_checkpoint(savedir) # 4. Add main loop for epoch in tqdm.tqdm(range(chk_dict['epoch'], 10), desc="Running Experiment"): # 5. train for one epoch train_dict = model.train_on_loader(train_loader, epoch=epoch) # 6. get and save metrics score_dict = {'epoch':epoch, 'acc': train_dict['train_acc'], 'loss':train_dict['train_loss']} chk_dict['score_list'] += [score_dict] images = model.vis_on_loader(train_loader) hw.save_checkpoint(savedir, score_list=chk_dict['score_list'], images=[images]) print('Experiment done\n') # 7. create main if __name__ == '__main__': # 8. define a list of experiments exp_list = [] for lr in [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, "bug"]: exp_list += [{'lr':lr, 'dataset':'mnist', 'model':'linear'}] # 9. Launch experiments using magic command hw.run_wizard(func=trainval, exp_list=exp_list, job_config=job_config.JOB_CONFIG)
# Savedir Arguments parser.add_argument('-sb', '--savedir_base', required=True) parser.add_argument('-d', '--datadir', required=True) # Others parser.add_argument("-r", "--reset", default=0, type=int) parser.add_argument("-j", "--run_jobs", default=0, type=int) parser.add_argument("-v", "--visualize_notebook", type=str, default='') args, others = parser.parse_known_args() # -- Launch Experiments # Get Experiment Groups import exp_configs exp_groups = exp_configs.EXP_GROUPS print('Launching exp_group: %s' % args.exp_group_list) if os.path.exists('job_configs.py'): import job_configs job_config = job_configs.JOB_CONFIG else: job_config = None # Run Selected Experiments hw.run_wizard(func=trainval, exp_groups=exp_groups, args=args, job_config=job_config)
args, others = parser.parse_known_args() # Define a list of experiments if args.exp_group == "syn": exp_list = [] for lr in [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]: exp_list += [{"lr": lr, "dataset": "syn", "model": "linear"}] # Choose Job Scheduler if args.job_scheduler == "slurm": import slurm_config job_config = slurm_config.JOB_CONFIG elif args.job_scheduler == "toolkit": import job_configs job_config = job_configs.JOB_CONFIG else: job_config = None # Run experiments and create results file hw.run_wizard( func=trainval, exp_list=exp_list, savedir_base=args.savedir_base, reset=args.reset, job_config=job_config, results_fname="results.ipynb", )
score_dict = { 'epoch': epoch, 'acc': train_dict['train_acc'], 'loss': train_dict['train_loss'] } chk_dict['score_list'] += [score_dict] images = model.vis_on_loader(train_loader) hw.save_checkpoint(savedir, score_list=chk_dict['score_list'], images=[images]) print('Experiment done\n') # 7. create main if __name__ == '__main__': # 8. define a list of experiments exp_list = [] for lr in [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, "bug"]: exp_list += [{'lr': lr, 'dataset': 'mnist', 'model': 'linear'}] # 9. Launch experiments using magic command if os.path.exists('job_config.py'): import job_config jc = job_config.JOB_CONFIG else: jc = None hw.run_wizard(func=trainval, exp_list=exp_list, job_config=jc)
ut.torch_save(model_path, model.get_state_dict()) print("Saved: %s" % savedir) if __name__ == "__main__": import exp_configs parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_group_list', nargs="+") parser.add_argument('-sb', '--savedir_base', required=True) parser.add_argument('-d', '--datadir', required=True) parser.add_argument("-r", "--reset", default=0, type=int) parser.add_argument("-c", "--cuda", default=1, type=int) parser.add_argument("-j", "--job_scheduler", default=None) parser.add_argument("-p", "--python_binary_path", default=None) args, others = parser.parse_known_args() # Get job configuration to launch experiments in the cluster job_config = None if os.path.exists('job_configs.py'): import job_configs job_config = job_configs.JOB_CONFIG # Run experiments either sequentially or in the cluster hw.run_wizard(func=trainval, exp_groups=exp_configs.EXP_GROUPS, job_config=job_config, job_scheduler=args.job_scheduler, python_binary_path=args.python_binary_path, use_threads=True, args=args)