예제 #1
0
def test_agent():
    env0 = MyTask()
    env1 = MyTask()
    env2 = MyTask()
    env3 = MyTask()

    hps_dict = {'hp_layer': [5, 10, 15], 'hp_epoch': [128, 256, 512]}
    hps = Hyperparameters(hps_dict)

    task_scheduler = TaskScheduler(['step0', 'step1'])

    task_scheduler.add_subtask('step0', 'task0', env=env0, hps=hps)
    task_scheduler.add_subtask('step0', 'task1', env=env1)

    task_scheduler.add_subtask('step1', 'task2', env=env2, hps=hps)
    task_scheduler.add_subtask('step1', 'task3', env=env3)

    task_scheduler.show_info()
    task_scheduler.get_sorted_task_ids()

    assert task_scheduler.get_sorted_task_ids() == ['step0', 'step1']
    assert task_scheduler.get_subtask_ids('step0') == ['task0', 'task1']
    assert task_scheduler.get_children_task_ids('step0') == ['step1']
    assert task_scheduler.get_parents_task_ids('step1') == ['step0']
    assert task_scheduler.get_subtask('step0', 'task0').env == env0

    storegate = StoreGate(backend='numpy', data_id='test_agent')
    saver = Saver()
    metric = RandomMetric()

    logger.set_level(logger.DEBUG)
    agent = GridSearchAgent(saver=saver,
                            storegate=storegate,
                            task_scheduler=task_scheduler,
                            metric=metric,
                            dump_all_results=True)

    assert agent._storegate is storegate
    assert agent._saver is saver
    assert agent._task_scheduler is task_scheduler
    assert agent._metric is metric

    agent.storegate = storegate
    agent.saver = saver
    agent.task_scheduler = task_scheduler
    agent.metric = metric

    assert agent.storegate is storegate
    assert agent.saver is saver
    assert agent.task_scheduler is task_scheduler
    assert agent.metric is metric

    agent.execute()
    agent.finalize()

    best_result = agent.get_best_result()
    assert best_result.metric_value > 0
예제 #2
0
def get_storegate(data_path='/tmp/onlyDiTau/', max_events=50000):
    # Index for signal/background shuffle
    cur_seed = np.random.get_state()
    np.random.seed(1)
    permute = np.random.permutation(2 * max_events)
    np.random.set_state(cur_seed)

    storegate = StoreGate(backend='numpy', data_id='')

    for path, var_names in [
        ("jet.npy",
         ('1stRecoJetPt', '1stRecoJetEta', '1stRecoJetPhi', '1stRecoJetMass',
          '2ndRecoJetPt', '2ndRecoJetEta', '2ndRecoJetPhi', '2ndRecoJetMass')),
        ("tau.npy",
         ('1stTruthTauJetPt', '1stTruthTauJetEta', '1stTruthTauJetPhi',
          '1stTruthTauJetMass', '2ndTruthTauJetPt', '2ndTruthTauJetEta',
          '2ndTruthTauJetPhi', '2ndTruthTauJetMass')),
        ("istau.npy", ('tauFlag1stJet', 'tauFlag2ndJet')),
        ("energy.npy", ('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap')),
    ]:
        data_list = []
        for label in ['Htautau', 'Zpure_tau']:
            data_loaded = np.load(data_path + f"{label}_{path}")
            data_loaded = data_loaded[:max_events]
            data_list.append(data_loaded)
        data_loaded = np.concatenate(data_list)
        data_loaded = data_loaded[permute]

        storegate.update_data(data=data_loaded,
                              var_names=var_names,
                              phase=(0.6, 0.2, 0.2))

    # # Added TauMass
    # tau_mass = np.full(shape=2 * max_events, fill_value=1.777)
    # storegate.update_data(
    #     data_id='',
    #     data=tau_mass,
    #     var_names=['TauMass'],
    # )
    # storegate._num_events['TauMass'] += len(tau_mass)

    # Setting labels
    labels = np.concatenate([
        np.ones(max_events),
        np.zeros(max_events),
    ])[permute]

    storegate.update_data(data=labels,
                          var_names='label',
                          phase=(0.6, 0.2, 0.2))

    storegate.compile()
    storegate.show_info()

    return storegate
예제 #3
0
def build_storegate():
    # Toy data
    storegate = StoreGate(backend='numpy', data_id='test_keras_mlp')
    data0 = np.random.normal(size=(100, 2))
    label = np.random.binomial(n=1, p=0.5, size=(100, ))
    phase = (0.8, 0.1, 0.1)
    storegate.add_data(var_names=['var0', 'var1'], data=data0, phase=phase)
    storegate.add_data(var_names='label', data=label, phase=phase)
    storegate.compile()
    storegate.show_info()

    return storegate
예제 #4
0
def get_storegate(max_events=50000):
    from my_tasks import reco_tau_4vec, truth_tau_4vec
    fourvec_var_list = tuple(reco_tau_4vec + truth_tau_4vec)
    # Toy data
    storegate = StoreGate(backend='numpy', data_id='')
    data0 = np.random.normal(size=(max_events, len(fourvec_var_list)))
    data1 = np.random.uniform(size=(max_events, 2, 16, 16, 3))
    label = np.random.binomial(n=1, p=0.5, size=(max_events, ))
    phase = (0.6, 0.2, 0.2)
    storegate.add_data(var_names=fourvec_var_list, data=data0, phase=phase)
    storegate.add_data(var_names=('1stRecoJetEnergyMap',
                                  '2ndRecoJetEnergyMap'),
                       data=data1,
                       phase=phase)
    storegate.add_data(var_names='label', data=label, phase=phase)
    storegate.compile()
    storegate.show_info()

    return storegate
def test_agent_basic_grid_scan():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    subtask0 = BaseTask()
    subtask1 = BaseTask()

    task_scheduler.add_task('step0')
    task_scheduler.add_subtask('step0', 'task0', env=subtask0)
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step1',
                               'task1',
                               env=subtask1,
                               hps=Hyperparameters({'job_id': [0, 1]}))

    for metric_type in ['min', 'max']:
        agent = GridSearchAgent(saver=saver,
                                storegate=storegate,
                                task_scheduler=task_scheduler,
                                metric=metric,
                                metric_type=metric_type,
                                dump_all_results=True)
        agent.execute()
        agent.finalize()

        agent.get_best_result()

    with pytest.raises(NotImplementedError):
        agent = GridSearchAgent(
            saver=saver,
            storegate=storegate,
            task_scheduler=task_scheduler,
            metric=metric,
            metric_type='dummy',
            dump_all_results=True,
        )
        agent.execute()
        agent.finalize()
예제 #6
0
def test_agent_basic_base():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    agent = BaseAgent()
    agent.storegate = storegate
    agent.saver = saver
    agent.task_scheduler = task_scheduler
    agent.metric = metric

    assert agent.storegate is storegate
    assert agent.saver is saver
    assert agent.task_scheduler is task_scheduler
    assert agent.metric is metric

    agent.execute()
    agent.finalize()

    print(agent)
예제 #7
0
def test_agent_basic_simple():

    saver = Saver()
    storegate = StoreGate(backend='numpy', data_id='test_agent')
    task_scheduler = TaskScheduler()
    metric = RandomMetric()

    subtask0 = BaseTask()
    subtask1 = BaseTask()

    task_scheduler.add_task('step0')
    task_scheduler.add_subtask('step0', 'task0', env=subtask0)
    task_scheduler.add_task('step1', parents=['step0'])
    task_scheduler.add_subtask('step1',
                               'task1',
                               env=subtask1,
                               hps=Hyperparameters({'job_id': [0, 1]}))

    agent = RandomSearchAgent(saver=saver,
                              storegate=storegate,
                              task_scheduler=task_scheduler,
                              metric=metric)
    agent.execute()
    agent.finalize()
예제 #8
0
def get_storegate(data_path='/tmp/onlyDiTau/', max_events=50000):
    # Index for signal/background shuffle
    
    
    cur_seed = np.random.get_state()
    np.random.seed(1)
    permute = np.random.permutation(2 * max_events)
    np.random.set_state(cur_seed)

    storegate = StoreGate(
        backend='numpy',
        data_id=''
    )

    for path, var_names in [
        ("jet.npy", (
            '1stRecoJetPt', '1stRecoJetEta', '1stRecoJetPhi', '1stRecoJetMass',
            '2ndRecoJetPt', '2ndRecoJetEta', '2ndRecoJetPhi', '2ndRecoJetMass'
        )),
        ("tau.npy", (
            '1stTruthTauJetPt', '1stTruthTauJetEta', '1stTruthTauJetPhi',
            '1stTruthTauJetMass', '2ndTruthTauJetPt', '2ndTruthTauJetEta',
            '2ndTruthTauJetPhi', '2ndTruthTauJetMass'
        )),
        ("istau.npy", ('tauFlag1stJet', 'tauFlag2ndJet')),
        ("energy.npy", ('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap')),
    ]:
        data_list = []
        for label in ['Htautau', 'Zpure_tau']:
            data_loaded = np.load(data_path + f"{label}_{path}")
            data_loaded = data_loaded[:max_events]
            data_list.append(data_loaded)
        data_loaded = np.concatenate(data_list)
        data_loaded = data_loaded[permute]

        if path == "energy.npy":
            # for Pytorch image axis
            data_loaded = np.transpose(data_loaded, (0, 1, 4, 2, 3))

        storegate.update_data(
            data=data_loaded,
            var_names=var_names,
            phase=(0.6, 0.2, 0.2)
        )


    # Setting labels
    
    labels = np.concatenate([
        np.ones(max_events),
        np.zeros(max_events),
    ])[permute]
    
    storegate.update_data(
        data=labels,
        var_names='label',
        phase=(0.6, 0.2, 0.2)
    )

    storegate.compile()
    # storegate.show_info()

    return storegate
예제 #9
0
    def __init__(self, max_events, data_path, phase=None):
        from multiml.storegate import StoreGate
        from multiml.data.numpy import NumpyFlatData
        cur_seed = np.random.get_state()
        np.random.seed(1)
        permute = np.random.permutation(2 * max_events)
        np.random.set_state(cur_seed)

        self.storegate = StoreGate(backend='numpy', data_id='')
        self.jet_vals = [
            '1stRecoJetPt', '1stRecoJetEta', '1stRecoJetPhi', '1stRecoJetMass',
            '2ndRecoJetPt', '2ndRecoJetEta', '2ndRecoJetPhi', '2ndRecoJetMass'
        ]
        self.tau_vals = [
            '1stTruthTauJetPt', '1stTruthTauJetEta', '1stTruthTauJetPhi',
            '1stTruthTauJetMass', '2ndTruthTauJetPt', '2ndTruthTauJetEta',
            '2ndTruthTauJetPhi', '2ndTruthTauJetMass'
        ]
        self.tau_vals_wo_mass = [
            '1stTruthTauJetPt',
            '1stTruthTauJetEta',
            '1stTruthTauJetPhi',
            '2ndTruthTauJetPt',
            '2ndTruthTauJetEta',
            '2ndTruthTauJetPhi',
        ]
        self.istau_vals = ['tauFlag1stJet', 'tauFlag2ndJet']
        self.label_vals = ['label']
        self.energy_vals = ['1stRecoJetEnergyMap', '2ndRecoJetEnergyMap']
        for path, var_names in [
            ("jet.npy", self.jet_vals),
            ("tau.npy", self.tau_vals),
            ("istau.npy", self.istau_vals),
            ("energy.npy", self.energy_vals),
        ]:
            data_list = []
            for label in ['Htautau', 'Zpure_tau']:
                data_loaded = NumpyFlatData().load_file(data_path +
                                                        f"{label}_{path}")
                data_loaded = data_loaded[:max_events]
                data_list.append(data_loaded)
            data_loaded = np.concatenate(data_list)
            data_loaded = data_loaded[permute]

            self.storegate.update_data(data_id='',
                                       data=data_loaded,
                                       var_names=var_names,
                                       phase=(0.6, 0.2, 0.2))

        labels = np.concatenate([
            np.ones(max_events),
            np.zeros(max_events),
        ])[permute]

        self.storegate.update_data(data_id='',
                                   data=labels,
                                   var_names='label',
                                   phase=(0.6, 0.2, 0.2))
        self.storegate.compile()
        if phase is None:
            phase = 'train'
        self.phase = phase
        self._swich_phase()
예제 #10
0
def test_storegate_zarr():
    storegate = StoreGate(backend='hybrid',
                          backend_args={'mode': 'w'},
                          data_id=data_id)

    assert storegate.data_id == data_id
    storegate.set_data_id(data_id)
    assert storegate.data_id == data_id

    phase = (0.8, 0.1, 0.1)

    # add new variables (bind_vars=True)
    storegate.add_data(var_names=var_names01, data=data01_bind, phase=phase)

    assert storegate.get_data_ids() == [data_id]

    # change hybrid mode
    storegate.set_mode('numpy')

    # add new variables (bind_vars=False)
    storegate.add_data(var_names=var_names23,
                       data=data23,
                       phase=phase,
                       bind_vars=False)
    storegate.to_storage(var_names=var_names23, phase='all')
    storegate.to_memory(var_names=var_names23, phase='train')

    # update existing variables and data (bind_vars=True)
    storegate.update_data(var_names=var_names23, data=data23_bind, phase=phase)

    # update existing variables and data (bind_vars=False)
    storegate.update_data(var_names=var_names23,
                          data=data23,
                          phase=phase,
                          bind_vars=False)

    # compile for multiai
    storegate.compile()
    storegate.get_metadata()
    storegate.show_info()

    # update data by auto mode
    storegate.update_data(var_names=var_names23,
                          data=data23,
                          phase='all',
                          bind_vars=False)
    storegate['all'][var_names23][:] = data23_bind
    storegate.compile()

    # get data
    storegate.get_data(var_names=var_names23, phase='train', index=0)
    storegate.get_data(var_names=var_names23, phase='all')
    storegate['all'][var_names23][:]

    # tests
    total_events = len(data0)
    train_events = total_events * phase[0]
    valid_events = total_events * phase[1]
    test_events = total_events * phase[2]

    assert len(storegate['train']) == total_events * phase[0]
    assert len(storegate['valid']) == total_events * phase[1]
    assert len(storegate['test']) == total_events * phase[2]

    assert var_names23[0] in storegate['train']
    assert var_names23[1] in storegate['train']

    storegate_train = storegate['train']

    assert storegate_train[var_names23][:].shape == (train_events,
                                                     len(var_names23))
    assert storegate_train[var_names23][0].shape == (len(var_names23), )
    assert storegate_train[var_names23[0]][:].shape == (train_events, )
    assert storegate_train[var_names23[0]][0] == 20

    # delete data 
    storegate.delete_data(var_names='var2', phase='train')
    del storegate['train'][['var0', 'var1']]
    assert storegate.get_var_names(phase='train') == ['var3']

    # convert dtype
    storegate.astype(var_names='var3', dtype=np.int)
    storegate.onehot(var_names='var3', num_classes=40)
    storegate.argmax(var_names='var3', axis=1)
    assert storegate['train']['var3'][0] == 30


    # test shuffle with numpy mode
    storegate = StoreGate(backend='numpy', data_id=data_id)
    storegate.add_data(var_names=var_names01,
                       data=data01_bind,
                       phase=phase,
                       shuffle=True)
    storegate.add_data(var_names=var_names23,
                       data=data01_bind,
                       phase=phase,
                       shuffle=True)
    storegate.compile()

    storegate_train = storegate['train']
    assert storegate_train[var_names01][0][0] == storegate_train[var_names23][
        0][0]