save_dir = f'output/{os.path.basename(__file__)[:-3]}' from run_utils import common_parser parser = common_parser() args = parser.parse_args() from run_utils import add_suffix save_dir = add_suffix(save_dir, args) from run_utils import get_config_multi_loss use_multi_loss, loss_weights = get_config_multi_loss(args) from run_utils import preprocessing saver, storegate, task_scheduler, metric = preprocessing( save_dir=save_dir, args=args, tau4vec_tasks=['MLP'], higgsId_tasks=['lstm'], ) # Time measurements from timer import timer timer_reg = {} # Agent from multiml.agent.keras import KerasConnectionRandomSearchAgent with timer(timer_reg, "initialize"): from my_tasks import mapping_truth_corr agent = KerasConnectionRandomSearchAgent( # BaseAgent saver=saver, storegate=storegate,
def main(conf: str, seed: int, gpu_index: int, data_path: str, event: int, weight: float, load_weights: bool, epoch: int, nopretraining: bool): global DEVICE from utils import load_config from run_utils import get_multi_loss, set_seed config = load_config(conf) if seed is not None: config.seed = seed if gpu_index is not None and DEVICE == device('cuda'): DEVICE = device(f'cuda:{gpu_index}') if data_path is not None: config['dataset']['params']['data_path'] = data_path if event is not None: config['dataset']['params']['max_events'] = int(event) set_seed(config.seed) use_multi_loss, loss_weights = get_multi_loss(weight) from run_utils import preprocessing saver, storegate, task_scheduler, metric = preprocessing( save_dir=save_dir, config=config, device=DEVICE, tau4vec_tasks=['MLP', 'conv2D', 'SF'], higgsId_tasks=['mlp', 'lstm', 'mass'], ) # Time measurements from timer import timer timer_reg = {} load_weights = load_weights nopretraining = nopretraining phases = ['test'] if load_weights else ['train', 'valid', 'test'] # Agent from multiml.agent.pytorch import PytorchSPOSNASAgent with timer(timer_reg, "initialize"): from my_tasks import mapping_truth_corr agent = PytorchSPOSNASAgent( # BaseAgent saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, # EnsembleAgent training_choiceblock_model=['test'], # ConnectionSimpleAgent freeze_model_weights=False, do_pretraining=not nopretraining, connectiontask_args={ "num_epochs": epoch, "max_patience": 10, "batch_size": 100, "load_weights": load_weights, "phases": phases, "loss_weights": loss_weights, "optimizer": "Adam", "optimizer_args": dict(lr=1e-3), "variable_mapping": mapping_truth_corr, "device": DEVICE, } ) with timer(timer_reg, "execute"): agent.execute() with timer(timer_reg, "finalize"): agent.finalize() if not load_weights: with open(f"{saver.save_dir}/timer.pkl", 'wb') as f: import pickle pickle.dump(timer_reg, f)
def main(conf: str, seed: int, gpu_index: int, data_path: str, event: int, weight: float, load_weights: bool, nopretraining: bool): global DEVICE from utils import load_config from run_utils import get_multi_loss, set_seed config = load_config(conf) if seed is not None: config.seed = seed if gpu_index is not None and DEVICE == device('cuda'): DEVICE = device(f'cuda:{gpu_index}') if data_path is not None: config['dataset']['params']['data_path'] = data_path if event is not None: config['dataset']['params']['max_events'] = int(event) set_seed(config.seed) use_multi_loss, loss_weights = get_multi_loss(weight) from run_utils import preprocessing saver, storegate, task_scheduler, metric = preprocessing( save_dir=save_dir, config=config, device=DEVICE, tau4vec_tasks=['conv2D', 'MLP', 'SF'], higgsId_tasks=['lstm', 'mlp', 'mass'], ) # Time measurements from timer import timer timer_reg = {} fix_submodel_weights = False load_weights = load_weights nopretraining = nopretraining phases = ['test'] if load_weights else ['train', 'valid', 'test'] # Agent from multiml.agent.pytorch import PytorchConnectionGridSearchAgent with timer(timer_reg, "initialize"): from my_tasks import mapping_truth_corr agent = PytorchConnectionGridSearchAgent( # BaseAgent saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, metric_type='max', dump_all_results=True, # ConnectionGridAgent reuse_pretraining=True, # ConnectionSimpleAgent freeze_model_weights=fix_submodel_weights, do_pretraining=not nopretraining, connectiontask_args={ "num_epochs": 100, "max_patience": 10, "batch_size": 100, "save_weights": not load_weights, "load_weights": load_weights, "phases": phases, "loss_weights": loss_weights, "optimizer": "Adam", "optimizer_args": dict(lr=1e-3), "variable_mapping": mapping_truth_corr, "device": DEVICE, } ) with timer(timer_reg, "execute"): agent.execute() with timer(timer_reg, "finalize"): agent.finalize() if not load_weights: with open(f"{saver.save_dir}/timer.pkl", 'wb') as f: import pickle pickle.dump(timer_reg, f) # Evaluate the best parameters result, config = agent.get_best_result() subtasks = [] job_id = None for task_id, subtask_id, params in zip(result['task_ids'], result['subtask_ids'], result['subtask_hps']): subtask = task_scheduler.get_subtask(task_id=task_id, subtask_id=subtask_id) params.update(load_weights=True, phases=['test']) subtask.env.set_hps(params) agent._execute_subtask(subtask, is_pretraining=True) subtasks.append(subtask.env) job_id = params['job_id'] subtask = agent._build_connected_models(subtasks, job_id=config["job_id"], use_task_scheduler=False) subtask.env.task_id = 'connection' subtask.env.subtask_id = subtask.env.name subtask.env.set_hps({"load_weights": True, "phases": ['test']}) agent._execute_subtask(subtask, is_pretraining=False) metric.storegate = storegate result_metric = metric.calculate() from multiml import logger logger.info(f'metric = {result_metric}')
import os save_dir = f'output/{os.path.basename(__file__)[:-3]}' from run_utils import common_parser parser = common_parser() args = parser.parse_args() from run_utils import add_suffix save_dir = add_suffix(save_dir, args) from run_utils import preprocessing saver, storegate, task_scheduler, metric = preprocessing( save_dir=save_dir, args=args, tau4vec_tasks=['MLP', 'conv2D', 'SF', 'zero', 'noise'], higgsId_tasks=['mlp', 'lstm', 'mass', 'zero', 'noise'], truth_intermediate_inputs=False, ) # Time measurements from timer import timer timer_reg = {} # Agent from multiml.agent.basic import GridSearchAgent with timer(timer_reg, "initialize"): agent = GridSearchAgent( # BaseAgent saver=saver, storegate=storegate,
def main(opts): logger.set_level(opts.loglevel) global DEVICE from utils import load_config from run_utils import get_multi_loss, set_seed config = load_config(opts.config) verbose = 1 if opts.seed is not None: config['seed'] = opts.seed if opts.gpu_index is not None and DEVICE == device('cuda'): DEVICE = device(f'cuda:{opts.gpu_index}') if opts.data_path is not None: config['dataset']['params']['data_path'] = opts.data_path if opts.event is not None: config['dataset']['params']['max_events'] = int(opts.event) if opts.clip_value is not None : config['ASNG']['clip'] = opts.clip_value if opts.alpha is not None : config['ASNG']['alpha'] = opts.alpha if opts.lam is not None : config['ASNG']['lam'] = opts.lam if opts.delta is not None : config['ASNG']['delta'] = opts.delta if opts.epochs is not None : config['ASNG']['epochs'] = opts.epochs set_seed(config.seed) if opts.do_pretrain : jobid = 'pretrain_' + opts.jobid else : jobid = 'no_train_' + opts.jobid save_dir = f'output/{os.path.basename(__file__)[:-3]}_{opts.event}evt_weight{opts.weight}_{jobid}' use_multi_loss, loss_weights = get_multi_loss(opts.weight) from run_utils import preprocessing saver, storegate, task_scheduler, metric = preprocessing( save_dir=save_dir, config=config, device=DEVICE, tau4vec_tasks=['conv2D', 'MLP', 'SF'], higgsId_tasks=['lstm', 'mlp', 'mass'], ) # Time measurements from timer import timer timer_reg = {} phases = ['test'] if opts.load_weights else ['train', 'valid', 'test'] # Agent logger.info(f'lambda / alpha / delta is {config.ASNG.lam} / {config.ASNG.alpha} / {config.ASNG.delta}') from multiml.agent.pytorch import PytorchASNGNASAgent with timer(timer_reg, "initialize"): from my_tasks import mapping_truth_corr config['ASNG']['connectiontask_args']['phases'] = phases config['ASNG']['connectiontask_args']['variable_mapping'] = mapping_truth_corr config['ASNG']['connectiontask_args']['device'] = DEVICE config['ASNG']['connectiontask_args']['loss_weights'] = loss_weights agent = PytorchASNGNASAgent( verbose = verbose, num_epochs = config.ASNG.epochs, max_patience = config.ASNG.patience, batch_size = config.ASNG.batch_size, asng_args = config.ASNG.asng_args, optimizer = config.ASNG.optimizer.name, optimizer_args = config.ASNG.optimizer.params, scheduler = config.ASNG.scheduler, # BaseAgent saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, # EnsembleAgent # ConnectionSimpleAgent freeze_model_weights=False, do_pretraining = opts.do_pretrain, connectiontask_args= config.ASNG.connectiontask_args, ) with timer(timer_reg, "execute"): agent.execute() with timer(timer_reg, "finalize"): agent.finalize() results = agent.results_json results['walltime'] = timer_reg['execute'][1] results['timer_reg'] = timer_reg results['seed'] = opts.seed results['nevents'] = opts.event*2 def print_dict(key, val) : if type(val) is dict : for k, v in val.items(): print_dict( f'{key} {k}', v) else : logger.info(f'{key: <30} : {val}') for key, val in results.items() : print_dict(key, val) with open(f'{saver.save_dir}/result.run_connection_asngnas_{opts.event}evt_weight{opts.weight}.json', 'w') as fo : json.dump([results], fo, indent=2) if not opts.load_weights: with open(f"{saver.save_dir}/timer.pkl", 'wb') as f: import pickle pickle.dump(timer_reg, f) ### post processing variables = [] from my_tasks import corr_tau_4vec variables.extend(corr_tau_4vec) variables.extend(['probability']) for phase in phases : # dump prediction storegate.set_data_id("") y_pred = np.array( storegate.get_data(phase = phase, var_names = variables ) ) os.makedirs(f'{saver.save_dir}/pred/{phase}', exist_ok = True ) for i, v in enumerate(variables): np.save(f'{saver.save_dir}/pred/{phase}/{v}', y_pred[i])