def _build_connected_models(self,
                                subtasks,
                                job_id=None,
                                use_task_scheduler=True):
        task_id = 'connection'
        subtask_id = self.connectiontask_name_with_jobid(job_id)

        subtask = self._ModelConnectionTask(
            name=subtask_id,
            saver=self._saver,
            subtasks=subtasks,
            **self._connectiontask_args,
        )
        subtask.task_id = task_id
        subtask.subtask_id = subtask_id

        if job_id is not None:
            subtask.set_hps({"job_id": job_id})
            if isinstance(subtask._save_weights, str):
                subtask._save_weights += f'__{job_id}'
            if isinstance(subtask._load_weights, str):
                subtask._load_weights += f'__{job_id}'

        if use_task_scheduler:
            self._task_scheduler.add_task(task_id=task_id, add_to_dag=False)
            self._task_scheduler.add_subtask(task_id, subtask_id, env=subtask)
            task = self._task_scheduler.get_subtask(task_id, subtask_id)
            return task
        else:
            from multiml.hyperparameter import Hyperparameters
            from multiml.task_scheduler import subtasktuple
            return subtasktuple(task_id, subtask_id, subtask,
                                Hyperparameters())
Esempio n. 2
0
def test_agent():
    env0 = MyTask()
    env1 = MyTask()
    env2 = MyTask()
    env3 = MyTask()

    hps_dict = {'hp_layer': [5, 10, 15], 'hp_epoch': [128, 256, 512]}
    hps = Hyperparameters(hps_dict)

    task_scheduler = TaskScheduler(['step0', 'step1'])

    task_scheduler.add_subtask('step0', 'task0', env=env0, hps=hps)
    task_scheduler.add_subtask('step0', 'task1', env=env1)

    task_scheduler.add_subtask('step1', 'task2', env=env2, hps=hps)
    task_scheduler.add_subtask('step1', 'task3', env=env3)

    task_scheduler.show_info()
    task_scheduler.get_sorted_task_ids()

    assert task_scheduler.get_sorted_task_ids() == ['step0', 'step1']
    assert task_scheduler.get_subtask_ids('step0') == ['task0', 'task1']
    assert task_scheduler.get_children_task_ids('step0') == ['step1']
    assert task_scheduler.get_parents_task_ids('step1') == ['step0']
    assert task_scheduler.get_subtask('step0', 'task0').env == env0

    storegate = StoreGate(backend='numpy', data_id='test_agent')
    saver = Saver()
    metric = RandomMetric()

    logger.set_level(logger.DEBUG)
    agent = GridSearchAgent(saver=saver,
                            storegate=storegate,
                            task_scheduler=task_scheduler,
                            metric=metric,
                            dump_all_results=True)

    assert agent._storegate is storegate
    assert agent._saver is saver
    assert agent._task_scheduler is task_scheduler
    assert agent._metric is metric

    agent.storegate = storegate
    agent.saver = saver
    agent.task_scheduler = task_scheduler
    agent.metric = metric

    assert agent.storegate is storegate
    assert agent.saver is saver
    assert agent.task_scheduler is task_scheduler
    assert agent.metric is metric

    agent.execute()
    agent.finalize()

    best_result = agent.get_best_result()
    assert best_result.metric_value > 0
def test_agent_basic_grid_scan():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    subtask0 = BaseTask()
    subtask1 = BaseTask()

    task_scheduler.add_task('step0')
    task_scheduler.add_subtask('step0', 'task0', env=subtask0)
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step1',
                               'task1',
                               env=subtask1,
                               hps=Hyperparameters({'job_id': [0, 1]}))

    for metric_type in ['min', 'max']:
        agent = GridSearchAgent(saver=saver,
                                storegate=storegate,
                                task_scheduler=task_scheduler,
                                metric=metric,
                                metric_type=metric_type,
                                dump_all_results=True)
        agent.execute()
        agent.finalize()

        agent.get_best_result()

    with pytest.raises(NotImplementedError):
        agent = GridSearchAgent(
            saver=saver,
            storegate=storegate,
            task_scheduler=task_scheduler,
            metric=metric,
            metric_type='dummy',
            dump_all_results=True,
        )
        agent.execute()
        agent.finalize()
Esempio n. 4
0
def test_agent_basic_simple():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    subtask0 = BaseTask()
    subtask1 = BaseTask()

    task_scheduler.add_task('step0')
    task_scheduler.add_subtask('step0', 'task0', env=subtask0)
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step1',
                               'task1',
                               env=subtask1,
                               hps=Hyperparameters({'job_id': [0, 1]}))

    agent = RandomSearchAgent(saver=saver,
                              storegate=storegate,
                              task_scheduler=task_scheduler,
                              metric=metric)
    agent.execute()
    agent.finalize()
Esempio n. 5
0
def get_higgsId_subtasks(saver,
                         subtask_names=[],
                         truth_input=True,
                         batch_norm=False,
                         load_weights=True,
                         use_logits=True,
                         run_eagerly=None):

    subtasks = []

    higgsId_args = {
        'saver': saver,
        'output_var_names': ('probability',),
        'true_var_names': 'label',
        'optimizer': 'adam',
        'optimizer_args': dict(learning_rate=1e-3),
        'num_epochs': 2,
        "max_patience": 1,
        'batch_size': 100,
        'phases': None,
        'save_weights': True,
        'run_eagerly': run_eagerly,
    }
    if load_weights:
        higgsId_args['load_weights'] = True
        higgsId_args['save_weights'] = False
        higgsId_args['phases'] = ['test']

    if use_logits:
        from tensorflow.keras.losses import BinaryCrossentropy
        higgsId_args['loss'] = BinaryCrossentropy(from_logits=True)
        activation_last = 'linear'
    else:
        higgsId_args['loss'] = 'binary_crossentropy'
        activation_last = 'sigmoid'

    if truth_input:
        higgsId_args['input_var_names'] = truth_tau_4vec
    else:
        higgsId_args['input_var_names'] = corr_tau_4vec

    from multiml.hyperparameter import Hyperparameters
    for subtask_name in subtask_names:
        subtask = {}
        if subtask_name == 'mlp':
            from multiml.task.keras import MLPTask
            subtask['subtask_id'] = 'higgsId-mlp'
            subtask['env'] = MLPTask(name='higgsId-mlp',
                                     activation='relu',
                                     activation_last=activation_last,
                                     batch_norm=batch_norm,
                                     **higgsId_args)
            subtask['hps'] = Hyperparameters({'layers': [[32, 32, 32, 1]]})

        elif subtask_name == 'lstm':
            from multiml_htautau.task.keras import HiggsID_LSTMTask
            subtask['subtask_id'] = 'higgsId-lstm'
            subtask['env'] = HiggsID_LSTMTask(name='higgsId-lstm',
                                              input_njets=2,
                                              activation_last=activation_last,
                                              batch_norm=batch_norm,
                                              **higgsId_args)
            subtask['hps'] = Hyperparameters({'nodes': [[32, 32, 32, 1]]})

        elif subtask_name == 'mass':
            from multiml_htautau.task.keras import HiggsID_MassTask
            subtask['subtask_id'] = 'higgsId-mass'
            subtask['env'] = HiggsID_MassTask(name='higgsId-mass',
                                              input_njets=2,
                                              activation='relu',
                                              activation_last=activation_last,
                                              batch_norm=batch_norm,
                                              scale_mass=1. / 125.,
                                              **higgsId_args)
            subtask['hps'] = Hyperparameters({'layers': [[64, 64, 1]]})

        elif subtask_name == 'zero':
            from multiml_htautau.task.keras import HiggsID_ZeroTask
            subtask['subtask_id'] = 'higgsId-zero'
            subtask['env'] = HiggsID_ZeroTask(name='higgsId-zero', input_njets=2, **higgsId_args)
            subtask['hps'] = None

        elif subtask_name == 'noise':
            subtask['subtask_id'] = 'higgsId-noise'
            from multiml_htautau.task.keras import HiggsID_NoiseTask
            subtask['env'] = HiggsID_NoiseTask(name='higgsId-noise', input_njets=2, **higgsId_args)
            subtask['hps'] = None

        else:
            raise KeyError(f"subtask_name = {subtask_name} is not defined.")

        subtasks.append(subtask)
    return subtasks
Esempio n. 6
0
def get_tau4vec_subtasks(saver,
                         subtask_names=[],
                         batch_norm=False,
                         load_weights=True,
                         run_eagerly=None):
    subtasks = []

    from multiml_htautau.task.loss import Tau4vecCalibLoss_tf
    tau4vec_args = {
        'saver': saver,
        'true_var_names': truth_tau_4vec,
        'input_vars_energy': ('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap'),
        'input_vars_jet': reco_tau_4vec,
        'output_var_names': corr_tau_4vec,
        'input_njets': 2,
        'optimizer': 'adam',
        'optimizer_args': dict(learning_rate=1e-3),
        'loss': Tau4vecCalibLoss_tf(pt_scale=1e-2, use_pxyz=True),
        'num_epochs': 2,
        "max_patience": 1,
        'batch_size': 100,
        'phases': None,
        'save_weights': True,
        'run_eagerly': run_eagerly,
    }
    if load_weights:
        tau4vec_args['load_weights'] = True
        tau4vec_args['save_weights'] = False
        tau4vec_args['phases'] = ['test']

    from multiml.hyperparameter import Hyperparameters
    for subtask_name in subtask_names:
        subtask = {}
        if subtask_name == 'MLP':
            from multiml_htautau.task.keras import Tau4vec_MLPTask
            subtask['subtask_id'] = 'tau4vec-MLP'
            subtask['env'] = Tau4vec_MLPTask(name='tau4vec-MLP',
                                             batch_norm=batch_norm,
                                             activation='relu',
                                             **tau4vec_args)
            subtask['hps'] = Hyperparameters({
                'layers_images': [[16, 16, 16, 4]],
                'layers_calib': [[32, 32, 3]],
            })

        elif subtask_name == 'conv2D':
            from multiml_htautau.task.keras import Tau4vec_Conv2DTask
            subtask['subtask_id'] = 'tau4vec-conv2D'
            layers_conv2d = [
                ('conv2d', {
                    'filters': 32,
                    'kernel_size': (3, 3)
                }),
                ('conv2d', {
                    'filters': 16,
                    'kernel_size': (3, 3)
                }),
                ('maxpooling2d', {
                    'pool_size': (2, 2)
                }),
                ('conv2d', {
                    'filters': 16,
                    'kernel_size': (2, 2)
                }),
                ('conv2d', {
                    'filters': 8,
                    'kernel_size': (2, 2)
                }),
            ]
            subtask['env'] = Tau4vec_Conv2DTask(name='tau4vec-conv2D',
                                                batch_norm=batch_norm,
                                                activation='relu',
                                                **tau4vec_args)
            subtask['hps'] = Hyperparameters({
                'layers_conv2d': [layers_conv2d],
                'layers_images': [[16, 16, 16, 4]],
                'layers_calib': [[64, 64, 64, 3]],
            })

        elif subtask_name == 'SF':
            from multiml_htautau.task.keras import Tau4vec_SFTask
            subtask['subtask_id'] = 'tau4vec-SF'
            subtask['env'] = Tau4vec_SFTask(name='tau4vec-SF',
                                            **tau4vec_args)
            subtask['hps'] = None

        elif subtask_name == 'zero':
            from multiml_htautau.task.keras import Tau4vec_ZeroTask
            subtask['subtask_id'] = 'tau4vec-zero'
            subtask['env'] = Tau4vec_ZeroTask(name='tau4vec-zero', **tau4vec_args)
            subtask['hps'] = None

        elif subtask_name == 'noise':
            from multiml_htautau.task.keras import Tau4vec_NoiseTask
            subtask['subtask_id'] = 'tau4vec-noise'
            subtask['env'] = Tau4vec_NoiseTask(name='tau4vec-noise', **tau4vec_args)
            subtask['hps'] = None

        else:
            raise KeyError(f"subtask_name = {subtask_name} is not defined.")

        subtasks.append(subtask)
    return subtasks
        [16, 16, 16, 1],
        [128, 128, 1],
        [64, 64, 1],
        [32, 32, 1],
        [16, 16, 1],
        [128, 1],
        [64, 1],
        [32, 1],
        [16, 1],
    ]
    subtask['env'] = MLPTask(name='MLP',
                             activation='relu',
                             activation_last=activation_last,
                             batch_norm=batch_norm,
                             **subtask_args)
    subtask['hps'] = Hyperparameters({'layers': layers})

    subtasks.append(subtask)

elif args.model == 'LSTM':
    from multiml_htautau.task.keras import HiggsID_LSTMTask
    subtask = {}
    subtask['subtask_id'] = 'LSTM'
    subtask['env'] = HiggsID_LSTMTask()
    nodes = [
        [128, 128, 128, 1],
        [64, 64, 64, 1],
        [32, 32, 32, 1],
        [16, 16, 16, 1],
        [128, 128, 1],
        [64, 64, 1],
Esempio n. 8
0
def get_higgsId_subtasks(config,
                         saver,
                         device='cpu',
                         subtask_names=[],
                         truth_input=True,
                         batch_norm=False,
                         load_weights=True,
                         use_logits=True):

    subtasks = []

    save_dir = saver.save_dir
    conf = config.pretrain
    higgsId_args = {
        'saver': saver,
        'output_var_names': ('probability',),
        'true_var_names': ('label',),
        'optimizer': conf.optimizer.name,
        'optimizer_args': dict( **conf.optimizer.params ),
        'num_epochs': conf.epochs,
        "max_patience": conf.patience,
        'batch_size': conf.batch_size,
        'save_weights': True,
        'load_weights': False,
        'device': device,
        'verbose':conf.verbose,
    }
    if load_weights:
        higgsId_args['load_weights'] = True
        higgsId_args['phases'] = ['test']

    if use_logits:
        higgsId_args['loss'] = 'BCEWithLogitsLoss'
        activation_last = 'Identity'
    else:
        higgsId_args['loss'] = 'BCELoss'
        activation_last = 'Sigmoid'

    if truth_input:
        higgsId_args['input_var_names'] = truth_tau_4vec
    else:
        higgsId_args['input_var_names'] = corr_tau_4vec

    from multiml.hyperparameter import Hyperparameters
    for subtask_name in subtask_names:
        subtask = {}
        if subtask_name == 'mlp':
            from multiml_htautau.task.pytorch import HiggsID_MLPTask
            conf = config.tasks.HiggsID_MLPTask
            subtask['subtask_id'] = 'higgsId-mlp'
            subtask['env'] = HiggsID_MLPTask(activation=conf.params.activation,
                                             activation_last=activation_last,
                                             batch_norm=conf.params.batch_norm,
                                             **higgsId_args)
            subtask['hps'] = Hyperparameters({'layers': [ conf.params.layers]})

        elif subtask_name == 'lstm':
            from multiml_htautau.task.pytorch import HiggsID_LSTMTask
            conf = config.tasks.HiggsID_LSTMTask
            subtask['subtask_id'] = 'higgsId-lstm'
            subtask['env'] = HiggsID_LSTMTask(layers_mlp=conf.params.layers_mlp,
                                              n_jets=conf.params.n_jets,
                                              activation_last=activation_last,
                                              batch_norm=conf.params.batch_norm,
                                              **higgsId_args)
            subtask['hps'] = Hyperparameters({'layers_lstm': [ conf.params.layers_lstm ]})

        elif subtask_name == 'mass':
            from multiml_htautau.task.pytorch import HiggsID_MassTask
            conf = config.tasks.HiggsID_MassTask
            subtask['subtask_id'] = 'higgsId-mass'
            subtask['env'] = HiggsID_MassTask(n_jets=conf.params.n_jets,
                                              n_input_vars=conf.params.n_input_vars,
                                              activation=conf.params.activation,
                                              activation_last=activation_last,
                                              batch_norm=conf.params.batch_norm,
                                              scale_mass=conf.params.scale_mass,
                                              **higgsId_args)
            subtask['hps'] = Hyperparameters({'layers': [conf.params.layers ]})

        else:
            raise KeyError(f"subtask_name = {subtask_name} is not defined.")

        subtasks.append(subtask)
    return subtasks
Esempio n. 9
0
def get_tau4vec_subtasks(config,
                         saver,
                         subtask_names=[],
                         device='cpu',
                         batch_norm=False,
                         load_weights=True):
    subtasks = []

    save_dir = saver.save_dir
    conf = config.pretrain

    from multiml_htautau.task.loss import Tau4vecCalibLoss_torch
    tau4vec_args = {
        'saver': saver,
        'true_var_names': truth_tau_4vec,
        'input_vars_energy': ('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap'),
        'input_vars_jet': reco_tau_4vec,
        'output_var_names': corr_tau_4vec,
        'input_njets': 2,
        'optimizer': conf.optimizer.name,
        'optimizer_args': dict( **conf.optimizer.params ),
        'loss': Tau4vecCalibLoss_torch(pt_scale=1e-2, use_pxyz=True),
        'num_epochs': conf.epochs,
        "max_patience": conf.patience,
        'batch_size': conf.batch_size,
        'save_weights': True,
        'load_weights': False,
        'device': device,
        'verbose':conf.verbose,
    }
    if load_weights:
        tau4vec_args['load_weights'] = True
        tau4vec_args['phases'] = ['test']

    from multiml import Hyperparameters
    for subtask_name in subtask_names:
        subtask = {}
        if subtask_name == 'MLP':
            from multiml_htautau.task.pytorch import Tau4vec_MLPTask
            conf = config.tasks.Tau4vec_MLPTask
            subtask['subtask_id'] = 'tau4vec-MLP'
            subtask['env'] = Tau4vec_MLPTask(batch_norm=conf.params.batch_norm,
                                             activation=conf.params.activation,
                                             **tau4vec_args)
            subtask['hps'] = Hyperparameters({
                'layers_images': [conf.params.layers_images],
                'layers_calib': [conf.params.layers_calib],
            })

        elif subtask_name == 'conv2D':
            from multiml_htautau.task.pytorch import Tau4vec_Conv2DTask
            conf = config.tasks.Tau4vec_Conv2DTask

            subtask['subtask_id'] = 'tau4vec-conv2D'
            subtask['env'] = Tau4vec_Conv2DTask(batch_norm=conf.params.batch_norm,
                                                activation=conf.params.activation,
                                                **tau4vec_args)
            subtask['hps'] = Hyperparameters({
                'layers_conv2d': [ conf.params.layers_conv2d ],
                'layers_images': [conf.params.layers_images ],
                'layers_calib': [conf.params.layers_calib ],
            })

        elif subtask_name == 'SF':
            from multiml_htautau.task.pytorch import Tau4vec_SFTask
            subtask['subtask_id'] = 'tau4vec-SF'
            subtask['env'] = Tau4vec_SFTask(**tau4vec_args)
            subtask['hps'] = None
        else:
            raise KeyError(f"subtask_name = {subtask_name} is not defined.")

        subtasks.append(subtask)
    return subtasks