Beispiel #1
0
def get_storegate(data_path='/tmp/onlyDiTau/', max_events=50000):
    # Index for signal/background shuffle
    cur_seed = np.random.get_state()
    np.random.seed(1)
    permute = np.random.permutation(2 * max_events)
    np.random.set_state(cur_seed)

    storegate = StoreGate(backend='numpy', data_id='')

    for path, var_names in [
        ("jet.npy",
         ('1stRecoJetPt', '1stRecoJetEta', '1stRecoJetPhi', '1stRecoJetMass',
          '2ndRecoJetPt', '2ndRecoJetEta', '2ndRecoJetPhi', '2ndRecoJetMass')),
        ("tau.npy",
         ('1stTruthTauJetPt', '1stTruthTauJetEta', '1stTruthTauJetPhi',
          '1stTruthTauJetMass', '2ndTruthTauJetPt', '2ndTruthTauJetEta',
          '2ndTruthTauJetPhi', '2ndTruthTauJetMass')),
        ("istau.npy", ('tauFlag1stJet', 'tauFlag2ndJet')),
        ("energy.npy", ('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap')),
    ]:
        data_list = []
        for label in ['Htautau', 'Zpure_tau']:
            data_loaded = np.load(data_path + f"{label}_{path}")
            data_loaded = data_loaded[:max_events]
            data_list.append(data_loaded)
        data_loaded = np.concatenate(data_list)
        data_loaded = data_loaded[permute]

        storegate.update_data(data=data_loaded,
                              var_names=var_names,
                              phase=(0.6, 0.2, 0.2))

    # # Added TauMass
    # tau_mass = np.full(shape=2 * max_events, fill_value=1.777)
    # storegate.update_data(
    #     data_id='',
    #     data=tau_mass,
    #     var_names=['TauMass'],
    # )
    # storegate._num_events['TauMass'] += len(tau_mass)

    # Setting labels
    labels = np.concatenate([
        np.ones(max_events),
        np.zeros(max_events),
    ])[permute]

    storegate.update_data(data=labels,
                          var_names='label',
                          phase=(0.6, 0.2, 0.2))

    storegate.compile()
    storegate.show_info()

    return storegate
Beispiel #2
0
def build_storegate():
    # Toy data
    storegate = StoreGate(backend='numpy', data_id='test_keras_mlp')
    data0 = np.random.normal(size=(100, 2))
    label = np.random.binomial(n=1, p=0.5, size=(100, ))
    phase = (0.8, 0.1, 0.1)
    storegate.add_data(var_names=['var0', 'var1'], data=data0, phase=phase)
    storegate.add_data(var_names='label', data=label, phase=phase)
    storegate.compile()
    storegate.show_info()

    return storegate
Beispiel #3
0
def get_storegate(max_events=50000):
    from my_tasks import reco_tau_4vec, truth_tau_4vec
    fourvec_var_list = tuple(reco_tau_4vec + truth_tau_4vec)
    # Toy data
    storegate = StoreGate(backend='numpy', data_id='')
    data0 = np.random.normal(size=(max_events, len(fourvec_var_list)))
    data1 = np.random.uniform(size=(max_events, 2, 16, 16, 3))
    label = np.random.binomial(n=1, p=0.5, size=(max_events, ))
    phase = (0.6, 0.2, 0.2)
    storegate.add_data(var_names=fourvec_var_list, data=data0, phase=phase)
    storegate.add_data(var_names=('1stRecoJetEnergyMap',
                                  '2ndRecoJetEnergyMap'),
                       data=data1,
                       phase=phase)
    storegate.add_data(var_names='label', data=label, phase=phase)
    storegate.compile()
    storegate.show_info()

    return storegate
Beispiel #4
0
def test_storegate_zarr():
    storegate = StoreGate(backend='hybrid',
                          backend_args={'mode': 'w'},
                          data_id=data_id)

    assert storegate.data_id == data_id
    storegate.set_data_id(data_id)
    assert storegate.data_id == data_id

    phase = (0.8, 0.1, 0.1)

    # add new variables (bind_vars=True)
    storegate.add_data(var_names=var_names01, data=data01_bind, phase=phase)

    assert storegate.get_data_ids() == [data_id]

    # change hybrid mode
    storegate.set_mode('numpy')

    # add new variables (bind_vars=False)
    storegate.add_data(var_names=var_names23,
                       data=data23,
                       phase=phase,
                       bind_vars=False)
    storegate.to_storage(var_names=var_names23, phase='all')
    storegate.to_memory(var_names=var_names23, phase='train')

    # update existing variables and data (bind_vars=True)
    storegate.update_data(var_names=var_names23, data=data23_bind, phase=phase)

    # update existing variables and data (bind_vars=False)
    storegate.update_data(var_names=var_names23,
                          data=data23,
                          phase=phase,
                          bind_vars=False)

    # compile for multiai
    storegate.compile()
    storegate.get_metadata()
    storegate.show_info()

    # update data by auto mode
    storegate.update_data(var_names=var_names23,
                          data=data23,
                          phase='all',
                          bind_vars=False)
    storegate['all'][var_names23][:] = data23_bind
    storegate.compile()

    # get data
    storegate.get_data(var_names=var_names23, phase='train', index=0)
    storegate.get_data(var_names=var_names23, phase='all')
    storegate['all'][var_names23][:]

    # tests
    total_events = len(data0)
    train_events = total_events * phase[0]
    valid_events = total_events * phase[1]
    test_events = total_events * phase[2]

    assert len(storegate['train']) == total_events * phase[0]
    assert len(storegate['valid']) == total_events * phase[1]
    assert len(storegate['test']) == total_events * phase[2]

    assert var_names23[0] in storegate['train']
    assert var_names23[1] in storegate['train']

    storegate_train = storegate['train']

    assert storegate_train[var_names23][:].shape == (train_events,
                                                     len(var_names23))
    assert storegate_train[var_names23][0].shape == (len(var_names23), )
    assert storegate_train[var_names23[0]][:].shape == (train_events, )
    assert storegate_train[var_names23[0]][0] == 20

    # delete data 
    storegate.delete_data(var_names='var2', phase='train')
    del storegate['train'][['var0', 'var1']]
    assert storegate.get_var_names(phase='train') == ['var3']

    # convert dtype
    storegate.astype(var_names='var3', dtype=np.int)
    storegate.onehot(var_names='var3', num_classes=40)
    storegate.argmax(var_names='var3', axis=1)
    assert storegate['train']['var3'][0] == 30


    # test shuffle with numpy mode
    storegate = StoreGate(backend='numpy', data_id=data_id)
    storegate.add_data(var_names=var_names01,
                       data=data01_bind,
                       phase=phase,
                       shuffle=True)
    storegate.add_data(var_names=var_names23,
                       data=data01_bind,
                       phase=phase,
                       shuffle=True)
    storegate.compile()

    storegate_train = storegate['train']
    assert storegate_train[var_names01][0][0] == storegate_train[var_names23][
        0][0]