def build_storegate(): # Toy data storegate = StoreGate(backend='numpy', data_id='test_keras_mlp') data0 = np.random.normal(size=(100, 2)) label = np.random.binomial(n=1, p=0.5, size=(100, )) phase = (0.8, 0.1, 0.1) storegate.add_data(var_names=['var0', 'var1'], data=data0, phase=phase) storegate.add_data(var_names='label', data=label, phase=phase) storegate.compile() storegate.show_info() return storegate
def get_storegate(max_events=50000): from my_tasks import reco_tau_4vec, truth_tau_4vec fourvec_var_list = tuple(reco_tau_4vec + truth_tau_4vec) # Toy data storegate = StoreGate(backend='numpy', data_id='') data0 = np.random.normal(size=(max_events, len(fourvec_var_list))) data1 = np.random.uniform(size=(max_events, 2, 16, 16, 3)) label = np.random.binomial(n=1, p=0.5, size=(max_events, )) phase = (0.6, 0.2, 0.2) storegate.add_data(var_names=fourvec_var_list, data=data0, phase=phase) storegate.add_data(var_names=('1stRecoJetEnergyMap', '2ndRecoJetEnergyMap'), data=data1, phase=phase) storegate.add_data(var_names='label', data=label, phase=phase) storegate.compile() storegate.show_info() return storegate
def test_storegate_zarr(): storegate = StoreGate(backend='hybrid', backend_args={'mode': 'w'}, data_id=data_id) assert storegate.data_id == data_id storegate.set_data_id(data_id) assert storegate.data_id == data_id phase = (0.8, 0.1, 0.1) # add new variables (bind_vars=True) storegate.add_data(var_names=var_names01, data=data01_bind, phase=phase) assert storegate.get_data_ids() == [data_id] # change hybrid mode storegate.set_mode('numpy') # add new variables (bind_vars=False) storegate.add_data(var_names=var_names23, data=data23, phase=phase, bind_vars=False) storegate.to_storage(var_names=var_names23, phase='all') storegate.to_memory(var_names=var_names23, phase='train') # update existing variables and data (bind_vars=True) storegate.update_data(var_names=var_names23, data=data23_bind, phase=phase) # update existing variables and data (bind_vars=False) storegate.update_data(var_names=var_names23, data=data23, phase=phase, bind_vars=False) # compile for multiai storegate.compile() storegate.get_metadata() storegate.show_info() # update data by auto mode storegate.update_data(var_names=var_names23, data=data23, phase='all', bind_vars=False) storegate['all'][var_names23][:] = data23_bind storegate.compile() # get data storegate.get_data(var_names=var_names23, phase='train', index=0) storegate.get_data(var_names=var_names23, phase='all') storegate['all'][var_names23][:] # tests total_events = len(data0) train_events = total_events * phase[0] valid_events = total_events * phase[1] test_events = total_events * phase[2] assert len(storegate['train']) == total_events * phase[0] assert len(storegate['valid']) == total_events * phase[1] assert len(storegate['test']) == total_events * phase[2] assert var_names23[0] in storegate['train'] assert var_names23[1] in storegate['train'] storegate_train = storegate['train'] assert storegate_train[var_names23][:].shape == (train_events, len(var_names23)) assert storegate_train[var_names23][0].shape == (len(var_names23), ) assert storegate_train[var_names23[0]][:].shape == (train_events, ) assert storegate_train[var_names23[0]][0] == 20 # delete data storegate.delete_data(var_names='var2', phase='train') del storegate['train'][['var0', 'var1']] assert storegate.get_var_names(phase='train') == ['var3'] # convert dtype storegate.astype(var_names='var3', dtype=np.int) storegate.onehot(var_names='var3', num_classes=40) storegate.argmax(var_names='var3', axis=1) assert storegate['train']['var3'][0] == 30 # test shuffle with numpy mode storegate = StoreGate(backend='numpy', data_id=data_id) storegate.add_data(var_names=var_names01, data=data01_bind, phase=phase, shuffle=True) storegate.add_data(var_names=var_names23, data=data01_bind, phase=phase, shuffle=True) storegate.compile() storegate_train = storegate['train'] assert storegate_train[var_names01][0][0] == storegate_train[var_names23][ 0][0]