def divide_run(kdst, folder_out): xbins = np.linspace(-200, 200, 21) ybins = np.linspace(-200, 200, 21) xcenters = (xbins[1:] + xbins[:-1]) / 2 ycenters = (ybins[1:] + ybins[:-1]) / 2 binstable = pd.DataFrame({'x': xcenters, 'y': ycenters}) kdst = kdst.assign(xbin=pd.cut(kdst.X, xbins, labels=xcenters), ybin=pd.cut(kdst.Y, ybins, labels=ycenters)) fout = fname.split('/')[-1].replace('kdst', 'kdst_chunked') fout = folder_out + fout groups = kdst.groupby(['xbin', 'ybin']) with tb.open_file(fout, 'w') as h5out: df_writer(h5out, binstable, 'BINS', 'BinInfo') for ind, group in groups: x, y = ind groupname = 'KDST' xindx = np.digitize(x, xbins) - 1 yindx = np.digitize(y, ybins) - 1 tablename = f'b_{xindx}_{yindx}' df_writer(h5out, group, groupname, tablename)
xcenters = (xbins[1:] + xbins[:-1]) / 2 ycenters = (ybins[1:] + ybins[:-1]) / 2 data = data.assign(xbin=pd.cut(data.X, xbins, labels=xcenters), ybin=pd.cut(data.Y, ybins, labels=ycenters)) #correct columns ecols = [f'E_{i}' for i in range(12)] + ['S2e'] for col in ecols: data[col] = data[col] * np.exp(data.Z / data['lt']) / data['e0cen'] #find mean and std of gaussians means = data.groupby(['xbin', 'ybin']).apply(mean_and_std).reset_index() mp_chunk.append(means) fullmap = pd.concat(mp_chunk, ignore_index=True) with tb.open_file(fout, 'w') as tab: df_writer(tab, fullmap, 'LT', 'LightTable') fnames_tp = fnames runs = re.findall('\d+', fnames_tp[0])[0] rune = re.findall('\d+', fnames_tp[-1])[0] fout = folder_out + 'runs_' + runs + '_' + rune + '.h5' mp_chunk = [] for tbname in tablenames: data = load_dsts(fnames_tp, 'KDST', tbname) xbin, ybin = data.xbin.unique()[0], data.ybin.unique()[0] xbins = np.linspace(xbin - 10, xbin + 10, 21) #1mm bins ybins = np.linspace(ybin - 10, ybin + 10, 21) xcenters = (xbins[1:] + xbins[:-1]) / 2 ycenters = (ybins[1:] + ybins[:-1]) / 2 data = data.assign(xbin=pd.cut(data.X, xbins, labels=xcenters), ybin=pd.cut(data.Y, ybins, labels=ycenters))
augmentation = parameters.augmentation) if action == 'predict': gen = predict_gen(data_path = parameters.predict_file, label_type = parameters.labeltype, net = net, batch_size = parameters.predict_batch, nevents = parameters.nevents_predict) coorname = ['xbin', 'ybin', 'zbin'] output_name = parameters.out_file if parameters.labeltype == LabelType.Segmentation: tname = 'VoxelsPred' else: tname = 'EventPred' with tb.open_file(output_name, 'w') as h5out: for dct in gen: if 'coords' in dct: coords = dct.pop('coords') #unpack coords and add them to dictionary dct.update({coorname[i]:coords[:, i] for i in range(3)}) predictions = dct.pop('predictions') #unpack predictions and add them back to dictionary dct.update({f'class_{i}':predictions[:, i] for i in range(predictions.shape[1])}) #create pandas dataframe and save to output file df = pd.DataFrame(dct) df_writer(h5out, df, 'DATASET', tname, columns_to_index=['dataset_id']) index_tables(output_name)
'nbins_y': config.nbins_y, 'min_z': min_z, 'max_z': max_z, 'nbins_z': config.nbins_z, 'Rmax': config.Rmax }).to_frame().T return eventInfo, binsInfo, hits if __name__ == "__main__": config = configure(sys.argv).as_namespace filesin = glob(os.path.expandvars(config.files_in)) start_id = 0 for f in filesin: eventInfo, binsInfo, hits = get_MCtables(f, config, start_id) start_id += len(eventInfo) with tb.open_file(os.path.expandvars(config.file_out), 'w') as h5out: dio.df_writer(h5out, eventInfo, 'DATASET', 'EventsInfo', columns_to_index=['dataset_id'], str_col_length=64) dio.df_writer(h5out, binsInfo, 'DATASET', 'BinsInfo') dio.df_writer(h5out, hits, 'DATASET', 'Voxels', columns_to_index=['dataset_id'])
"xmax", "ymax", "zmax", "rmax", "xb1", "yb1", "zb1", "eb1", "xb2", "yb2", "zb2", "eb2", "ovlp_e" ] data = namedtuple("data", columns) # auxiliar namedtuple paolina_algorithm = track_blob_info_creator_extractor(**paolina_params) out_df = pd.DataFrame(columns=columns) try: DECO = pd.read_hdf(in_filename, "DECO/Events") except KeyError: # save empty file with tb.open_file(out_filename, "w") as h5out: df_writer(h5out, out_df, group_name="tracks", table_name="events") index_tables(out_filename) sys.exit() for (event, peak), deco in DECO.groupby(["event", "npeak"]): # pre-proccess deco.loc[:, "time"] = 0 deco.loc[:, "Ec"] = deco["E"] deco.loc[:, "Ep"] = deco["E"] deco.loc[:, ("Q", "Xrms", "Yrms", "nsipm")] = np.nan # Paolina hitc = hits_from_df(deco)[event] df, voxels, track_hitc, out_of_map = paolina_algorithm(hitc)