def _build_connected_models(self, subtasks, job_id=None, use_task_scheduler=True): task_id = 'connection' subtask_id = self.connectiontask_name_with_jobid(job_id) subtask = self._ModelConnectionTask( name=subtask_id, saver=self._saver, subtasks=subtasks, **self._connectiontask_args, ) subtask.task_id = task_id subtask.subtask_id = subtask_id if job_id is not None: subtask.set_hps({"job_id": job_id}) if isinstance(subtask._save_weights, str): subtask._save_weights += f'__{job_id}' if isinstance(subtask._load_weights, str): subtask._load_weights += f'__{job_id}' if use_task_scheduler: self._task_scheduler.add_task(task_id=task_id, add_to_dag=False) self._task_scheduler.add_subtask(task_id, subtask_id, env=subtask) task = self._task_scheduler.get_subtask(task_id, subtask_id) return task else: from multiml.hyperparameter import Hyperparameters from multiml.task_scheduler import subtasktuple return subtasktuple(task_id, subtask_id, subtask, Hyperparameters())
def test_agent(): env0 = MyTask() env1 = MyTask() env2 = MyTask() env3 = MyTask() hps_dict = {'hp_layer': [5, 10, 15], 'hp_epoch': [128, 256, 512]} hps = Hyperparameters(hps_dict) task_scheduler = TaskScheduler(['step0', 'step1']) task_scheduler.add_subtask('step0', 'task0', env=env0, hps=hps) task_scheduler.add_subtask('step0', 'task1', env=env1) task_scheduler.add_subtask('step1', 'task2', env=env2, hps=hps) task_scheduler.add_subtask('step1', 'task3', env=env3) task_scheduler.show_info() task_scheduler.get_sorted_task_ids() assert task_scheduler.get_sorted_task_ids() == ['step0', 'step1'] assert task_scheduler.get_subtask_ids('step0') == ['task0', 'task1'] assert task_scheduler.get_children_task_ids('step0') == ['step1'] assert task_scheduler.get_parents_task_ids('step1') == ['step0'] assert task_scheduler.get_subtask('step0', 'task0').env == env0 storegate = StoreGate(backend='numpy', data_id='test_agent') saver = Saver() metric = RandomMetric() logger.set_level(logger.DEBUG) agent = GridSearchAgent(saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, dump_all_results=True) assert agent._storegate is storegate assert agent._saver is saver assert agent._task_scheduler is task_scheduler assert agent._metric is metric agent.storegate = storegate agent.saver = saver agent.task_scheduler = task_scheduler agent.metric = metric assert agent.storegate is storegate assert agent.saver is saver assert agent.task_scheduler is task_scheduler assert agent.metric is metric agent.execute() agent.finalize() best_result = agent.get_best_result() assert best_result.metric_value > 0
def test_agent_basic_grid_scan(): saver = Saver() storegate = StoreGate(backend='numpy', data_id='test_agent') task_scheduler = TaskScheduler() metric = RandomMetric() subtask0 = BaseTask() subtask1 = BaseTask() task_scheduler.add_task('step0') task_scheduler.add_subtask('step0', 'task0', env=subtask0) task_scheduler.add_task('step1', parents=['step0']) task_scheduler.add_subtask('step1', 'task1', env=subtask1, hps=Hyperparameters({'job_id': [0, 1]})) for metric_type in ['min', 'max']: agent = GridSearchAgent(saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, metric_type=metric_type, dump_all_results=True) agent.execute() agent.finalize() agent.get_best_result() with pytest.raises(NotImplementedError): agent = GridSearchAgent( saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric, metric_type='dummy', dump_all_results=True, ) agent.execute() agent.finalize()
def test_agent_basic_simple(): saver = Saver() storegate = StoreGate(backend='numpy', data_id='test_agent') task_scheduler = TaskScheduler() metric = RandomMetric() subtask0 = BaseTask() subtask1 = BaseTask() task_scheduler.add_task('step0') task_scheduler.add_subtask('step0', 'task0', env=subtask0) task_scheduler.add_task('step1', parents=['step0']) task_scheduler.add_subtask('step1', 'task1', env=subtask1, hps=Hyperparameters({'job_id': [0, 1]})) agent = RandomSearchAgent(saver=saver, storegate=storegate, task_scheduler=task_scheduler, metric=metric) agent.execute() agent.finalize()
def get_higgsId_subtasks(saver, subtask_names=[], truth_input=True, batch_norm=False, load_weights=True, use_logits=True, run_eagerly=None): subtasks = [] higgsId_args = { 'saver': saver, 'output_var_names': ('probability',), 'true_var_names': 'label', 'optimizer': 'adam', 'optimizer_args': dict(learning_rate=1e-3), 'num_epochs': 2, "max_patience": 1, 'batch_size': 100, 'phases': None, 'save_weights': True, 'run_eagerly': run_eagerly, } if load_weights: higgsId_args['load_weights'] = True higgsId_args['save_weights'] = False higgsId_args['phases'] = ['test'] if use_logits: from tensorflow.keras.losses import BinaryCrossentropy higgsId_args['loss'] = BinaryCrossentropy(from_logits=True) activation_last = 'linear' else: higgsId_args['loss'] = 'binary_crossentropy' activation_last = 'sigmoid' if truth_input: higgsId_args['input_var_names'] = truth_tau_4vec else: higgsId_args['input_var_names'] = corr_tau_4vec from multiml.hyperparameter import Hyperparameters for subtask_name in subtask_names: subtask = {} if subtask_name == 'mlp': from multiml.task.keras import MLPTask subtask['subtask_id'] = 'higgsId-mlp' subtask['env'] = MLPTask(name='higgsId-mlp', activation='relu', activation_last=activation_last, batch_norm=batch_norm, **higgsId_args) subtask['hps'] = Hyperparameters({'layers': [[32, 32, 32, 1]]}) elif subtask_name == 'lstm': from multiml_htautau.task.keras import HiggsID_LSTMTask subtask['subtask_id'] = 'higgsId-lstm' subtask['env'] = HiggsID_LSTMTask(name='higgsId-lstm', input_njets=2, activation_last=activation_last, batch_norm=batch_norm, **higgsId_args) subtask['hps'] = Hyperparameters({'nodes': [[32, 32, 32, 1]]}) elif subtask_name == 'mass': from multiml_htautau.task.keras import HiggsID_MassTask subtask['subtask_id'] = 'higgsId-mass' subtask['env'] = HiggsID_MassTask(name='higgsId-mass', input_njets=2, activation='relu', activation_last=activation_last, batch_norm=batch_norm, scale_mass=1. / 125., **higgsId_args) subtask['hps'] = Hyperparameters({'layers': [[64, 64, 1]]}) elif subtask_name == 'zero': from multiml_htautau.task.keras import HiggsID_ZeroTask subtask['subtask_id'] = 'higgsId-zero' subtask['env'] = HiggsID_ZeroTask(name='higgsId-zero', input_njets=2, **higgsId_args) subtask['hps'] = None elif subtask_name == 'noise': subtask['subtask_id'] = 'higgsId-noise' from multiml_htautau.task.keras import HiggsID_NoiseTask subtask['env'] = HiggsID_NoiseTask(name='higgsId-noise', input_njets=2, **higgsId_args) subtask['hps'] = None else: raise KeyError(f"subtask_name = {subtask_name} is not defined.") subtasks.append(subtask) return subtasks
def get_tau4vec_subtasks(saver, subtask_names=[], batch_norm=False, load_weights=True, run_eagerly=None): subtasks = [] from multiml_htautau.task.loss import Tau4vecCalibLoss_tf tau4vec_args = { 'saver': saver, 'true_var_names': truth_tau_4vec, 'input_vars_energy': ('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap'), 'input_vars_jet': reco_tau_4vec, 'output_var_names': corr_tau_4vec, 'input_njets': 2, 'optimizer': 'adam', 'optimizer_args': dict(learning_rate=1e-3), 'loss': Tau4vecCalibLoss_tf(pt_scale=1e-2, use_pxyz=True), 'num_epochs': 2, "max_patience": 1, 'batch_size': 100, 'phases': None, 'save_weights': True, 'run_eagerly': run_eagerly, } if load_weights: tau4vec_args['load_weights'] = True tau4vec_args['save_weights'] = False tau4vec_args['phases'] = ['test'] from multiml.hyperparameter import Hyperparameters for subtask_name in subtask_names: subtask = {} if subtask_name == 'MLP': from multiml_htautau.task.keras import Tau4vec_MLPTask subtask['subtask_id'] = 'tau4vec-MLP' subtask['env'] = Tau4vec_MLPTask(name='tau4vec-MLP', batch_norm=batch_norm, activation='relu', **tau4vec_args) subtask['hps'] = Hyperparameters({ 'layers_images': [[16, 16, 16, 4]], 'layers_calib': [[32, 32, 3]], }) elif subtask_name == 'conv2D': from multiml_htautau.task.keras import Tau4vec_Conv2DTask subtask['subtask_id'] = 'tau4vec-conv2D' layers_conv2d = [ ('conv2d', { 'filters': 32, 'kernel_size': (3, 3) }), ('conv2d', { 'filters': 16, 'kernel_size': (3, 3) }), ('maxpooling2d', { 'pool_size': (2, 2) }), ('conv2d', { 'filters': 16, 'kernel_size': (2, 2) }), ('conv2d', { 'filters': 8, 'kernel_size': (2, 2) }), ] subtask['env'] = Tau4vec_Conv2DTask(name='tau4vec-conv2D', batch_norm=batch_norm, activation='relu', **tau4vec_args) subtask['hps'] = Hyperparameters({ 'layers_conv2d': [layers_conv2d], 'layers_images': [[16, 16, 16, 4]], 'layers_calib': [[64, 64, 64, 3]], }) elif subtask_name == 'SF': from multiml_htautau.task.keras import Tau4vec_SFTask subtask['subtask_id'] = 'tau4vec-SF' subtask['env'] = Tau4vec_SFTask(name='tau4vec-SF', **tau4vec_args) subtask['hps'] = None elif subtask_name == 'zero': from multiml_htautau.task.keras import Tau4vec_ZeroTask subtask['subtask_id'] = 'tau4vec-zero' subtask['env'] = Tau4vec_ZeroTask(name='tau4vec-zero', **tau4vec_args) subtask['hps'] = None elif subtask_name == 'noise': from multiml_htautau.task.keras import Tau4vec_NoiseTask subtask['subtask_id'] = 'tau4vec-noise' subtask['env'] = Tau4vec_NoiseTask(name='tau4vec-noise', **tau4vec_args) subtask['hps'] = None else: raise KeyError(f"subtask_name = {subtask_name} is not defined.") subtasks.append(subtask) return subtasks
[16, 16, 16, 1], [128, 128, 1], [64, 64, 1], [32, 32, 1], [16, 16, 1], [128, 1], [64, 1], [32, 1], [16, 1], ] subtask['env'] = MLPTask(name='MLP', activation='relu', activation_last=activation_last, batch_norm=batch_norm, **subtask_args) subtask['hps'] = Hyperparameters({'layers': layers}) subtasks.append(subtask) elif args.model == 'LSTM': from multiml_htautau.task.keras import HiggsID_LSTMTask subtask = {} subtask['subtask_id'] = 'LSTM' subtask['env'] = HiggsID_LSTMTask() nodes = [ [128, 128, 128, 1], [64, 64, 64, 1], [32, 32, 32, 1], [16, 16, 16, 1], [128, 128, 1], [64, 64, 1],
def get_higgsId_subtasks(config, saver, device='cpu', subtask_names=[], truth_input=True, batch_norm=False, load_weights=True, use_logits=True): subtasks = [] save_dir = saver.save_dir conf = config.pretrain higgsId_args = { 'saver': saver, 'output_var_names': ('probability',), 'true_var_names': ('label',), 'optimizer': conf.optimizer.name, 'optimizer_args': dict( **conf.optimizer.params ), 'num_epochs': conf.epochs, "max_patience": conf.patience, 'batch_size': conf.batch_size, 'save_weights': True, 'load_weights': False, 'device': device, 'verbose':conf.verbose, } if load_weights: higgsId_args['load_weights'] = True higgsId_args['phases'] = ['test'] if use_logits: higgsId_args['loss'] = 'BCEWithLogitsLoss' activation_last = 'Identity' else: higgsId_args['loss'] = 'BCELoss' activation_last = 'Sigmoid' if truth_input: higgsId_args['input_var_names'] = truth_tau_4vec else: higgsId_args['input_var_names'] = corr_tau_4vec from multiml.hyperparameter import Hyperparameters for subtask_name in subtask_names: subtask = {} if subtask_name == 'mlp': from multiml_htautau.task.pytorch import HiggsID_MLPTask conf = config.tasks.HiggsID_MLPTask subtask['subtask_id'] = 'higgsId-mlp' subtask['env'] = HiggsID_MLPTask(activation=conf.params.activation, activation_last=activation_last, batch_norm=conf.params.batch_norm, **higgsId_args) subtask['hps'] = Hyperparameters({'layers': [ conf.params.layers]}) elif subtask_name == 'lstm': from multiml_htautau.task.pytorch import HiggsID_LSTMTask conf = config.tasks.HiggsID_LSTMTask subtask['subtask_id'] = 'higgsId-lstm' subtask['env'] = HiggsID_LSTMTask(layers_mlp=conf.params.layers_mlp, n_jets=conf.params.n_jets, activation_last=activation_last, batch_norm=conf.params.batch_norm, **higgsId_args) subtask['hps'] = Hyperparameters({'layers_lstm': [ conf.params.layers_lstm ]}) elif subtask_name == 'mass': from multiml_htautau.task.pytorch import HiggsID_MassTask conf = config.tasks.HiggsID_MassTask subtask['subtask_id'] = 'higgsId-mass' subtask['env'] = HiggsID_MassTask(n_jets=conf.params.n_jets, n_input_vars=conf.params.n_input_vars, activation=conf.params.activation, activation_last=activation_last, batch_norm=conf.params.batch_norm, scale_mass=conf.params.scale_mass, **higgsId_args) subtask['hps'] = Hyperparameters({'layers': [conf.params.layers ]}) else: raise KeyError(f"subtask_name = {subtask_name} is not defined.") subtasks.append(subtask) return subtasks
def get_tau4vec_subtasks(config, saver, subtask_names=[], device='cpu', batch_norm=False, load_weights=True): subtasks = [] save_dir = saver.save_dir conf = config.pretrain from multiml_htautau.task.loss import Tau4vecCalibLoss_torch tau4vec_args = { 'saver': saver, 'true_var_names': truth_tau_4vec, 'input_vars_energy': ('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap'), 'input_vars_jet': reco_tau_4vec, 'output_var_names': corr_tau_4vec, 'input_njets': 2, 'optimizer': conf.optimizer.name, 'optimizer_args': dict( **conf.optimizer.params ), 'loss': Tau4vecCalibLoss_torch(pt_scale=1e-2, use_pxyz=True), 'num_epochs': conf.epochs, "max_patience": conf.patience, 'batch_size': conf.batch_size, 'save_weights': True, 'load_weights': False, 'device': device, 'verbose':conf.verbose, } if load_weights: tau4vec_args['load_weights'] = True tau4vec_args['phases'] = ['test'] from multiml import Hyperparameters for subtask_name in subtask_names: subtask = {} if subtask_name == 'MLP': from multiml_htautau.task.pytorch import Tau4vec_MLPTask conf = config.tasks.Tau4vec_MLPTask subtask['subtask_id'] = 'tau4vec-MLP' subtask['env'] = Tau4vec_MLPTask(batch_norm=conf.params.batch_norm, activation=conf.params.activation, **tau4vec_args) subtask['hps'] = Hyperparameters({ 'layers_images': [conf.params.layers_images], 'layers_calib': [conf.params.layers_calib], }) elif subtask_name == 'conv2D': from multiml_htautau.task.pytorch import Tau4vec_Conv2DTask conf = config.tasks.Tau4vec_Conv2DTask subtask['subtask_id'] = 'tau4vec-conv2D' subtask['env'] = Tau4vec_Conv2DTask(batch_norm=conf.params.batch_norm, activation=conf.params.activation, **tau4vec_args) subtask['hps'] = Hyperparameters({ 'layers_conv2d': [ conf.params.layers_conv2d ], 'layers_images': [conf.params.layers_images ], 'layers_calib': [conf.params.layers_calib ], }) elif subtask_name == 'SF': from multiml_htautau.task.pytorch import Tau4vec_SFTask subtask['subtask_id'] = 'tau4vec-SF' subtask['env'] = Tau4vec_SFTask(**tau4vec_args) subtask['hps'] = None else: raise KeyError(f"subtask_name = {subtask_name} is not defined.") subtasks.append(subtask) return subtasks