def read_dijet_events_and_features_from_dir(dir_path, max_num_evts=int(1.2e6)): print('reading', dir_path) maxEvts = max_num_evts pb = ProgressBar(maxEvts) constituents_concat = [] features_concat = [] flist = get_file_list(dir_path) print('num files in dir:', len(flist)) pb.show(0) for i_file, fname in enumerate(flist): try: constituents, features = read_dijet_events_and_features_from_file( fname) constituents, features = filter_arrays_on_value( [constituents, features], features[:, 0], Mjj_cut) constituents_concat.extend(constituents) features_concat.extend(features) pb.show(len(constituents_concat)) except Exception as e: print("\nCould not read file ", fname, ': ', repr(e)) if len(constituents_concat) > maxEvts: break particle_feature_names, = read_data_from_file(fname, 'particleFeatureNames') evt_feature_names, = read_data_from_file(fname, 'eventFeatureNames') print('\nnum files read in dir ', dir_path, ': ', i_file + 1) return [ constituents_concat, particle_feature_names, features_concat, evt_feature_names ]
def read_dijet_events_and_features_pt_split(input_dir): flist = glob(input_dir + '/' + '*.h5') f = h5py.File(flist[0], 'r') keys = f.keys() maxEvts = int(1e9) pb = ProgressBar(maxEvts) jet_const_pt1_over = np.empty( (0, 2, 100, 3), int ) # each event = 2 jets, each 100 particles, each ( dEta, dPhi, pt ) jet_const_pt1_under = np.empty((0, 2, 100, 3), int) jet_feat_pt1_over = np.empty((0, 11), int) # 11 jet features per event jet_feat_pt1_under = np.empty((0, 11), int) fnum = 0 evt_num = 0 pb.show(evt_num) for fnum, fname in enumerate(flist): f = h5py.File(fname, 'r') aux_features = np.array(f.get('eventFeatures')) aux_constituents = np.array(f.get('jetConstituentsList')) evt_num = evt_num + aux_constituents.shape[0] # mass cut aux_features, aux_constituents = filter_arrays_on_value( [aux_features, aux_constituents], aux_features[:, 0], Mjj_cut) # pt_j1 separation pt_cut_passed = aux_features[:, 1] > pt_j1_cut jet_const_pt1_over = np.append(jet_const_pt1_over, aux_constituents[pt_cut_passed], axis=0) jet_feat_pt1_over = np.append(jet_feat_pt1_over, aux_features[pt_cut_passed], axis=0) jet_const_pt1_under = np.append(jet_const_pt1_under, aux_constituents[~pt_cut_passed], axis=0) jet_feat_pt1_under = np.append(jet_feat_pt1_under, aux_features[~pt_cut_passed], axis=0) pb.show(evt_num) if (fnum % 100 == 0): print(evt_num, ' events read, ', jet_const_pt1_over.shape[0] + jet_const_pt1_under.shape[0], ' events passed mass cut (size = ', (jet_const_pt1_over.nbytes + jet_const_pt1_under.nbytes) / 1000., ' KB)') print(evt_num, ' events read, ', jet_const_pt1_over.shape[0] + jet_const_pt1_under.shape[0], ' events passed mass cut') return [ jet_const_pt1_over, jet_feat_pt1_over, jet_const_pt1_under, jet_feat_pt1_under, keys ]
def read_in_data(file_dir): pb = ProgressBar(maxEvts) data = None flist = glob.glob(file_dir + '/' + '*result.h5') #print(flist) i_file = 0 labels = None for i_file, fname in enumerate(flist): f = h5py.File(fname, 'r') #print(f.keys()) aux_evts = np.array(f.get('results')) aux_evts = aux_evts[aux_evts[:, 0] > Mjj_selection] if data is None: labels = list(f.get('labels')) print('Labels:') print(labels) pb.show(0) data = aux_evts else: data = np.append(data, aux_evts, axis=0) pb.show(data.shape[0]) print('\nnum files read in dir ', file_dir, ': ', i_file + 1) return data
def read_data_from_dir(dir_path, datakey, labelkey, mjj_cut=True): print('reading', dir_path) maxEvts = int(1e9) pb = ProgressBar(maxEvts) data = [] flist = get_file_list(dir_path) print('num files in dir:', len(flist)) pb.show(0) for i_file, fname in enumerate(flist): try: results, = read_data_from_file(fname, datakey) if mjj_cut: results = results[results[:, 0] > Mjj_cut] data.extend(results) pb.show(len(data)) except Exception as e: print("\nCould not read file ", fname, ': ', repr(e)) for i_file, fname in enumerate(flist): try: labels, = read_data_from_file(fname, labelkey) break except Exception as e: print("\nCould not read file ", fname, ': ', repr(e)) print('Labels:', labels) print('\nnum files read in dir ', dir_path, ': ', i_file + 1) return [np.asarray(data), labels]
def __init__(self, state, actionList, regions, names): discountFactor = 0.9 self.rsa = {} self.regions = regions self.names = names p = ProgressBar(len(actionList), 'Querying', 'Complete') p.show(0) for act in actionList: ns = self.nextStates(state, act) nextqsa = 0 for s in ns: A = self.possibleActions(s, regions) a = self.argmax(s, A) nextqsa += float(a[1]) qsa = self.getQsa(state, act) rw = qsa - discountFactor * (1. / float(len(ns))) * nextqsa self.rsa[act] = rw p.show(actionList.index(act) + 1)
def __init__(self, state, actionList, regions, names): discountFactor = 0.9 self.rsa = {} self.regions = regions self.names = names p = ProgressBar(len(actionList), 'Querying', 'Complete') p.show(0) for act in actionList: ns = self.nextStates(state, act) nextqsa = 0 for s in ns: A = self.possibleActions(s, regions) a = self.argmax(s,A) nextqsa += float(a[1]) qsa = self.getQsa(state, act) rw = qsa - discountFactor*(1./float(len(ns)))*nextqsa self.rsa[act] = rw p.show(actionList.index(act)+1)
file_dir = '/eos/project/d/dshep/TOPCLASS/DijetAnomaly/VAE_results/' maxEvts = int(1e6) pb = ProgressBar(maxEvts) data_train = None name_tag = 'qcd_sqrtshatTeV_13TeV_PU40' flist = glob(file_dir + name_tag + '/' + name_tag + '_*.h5') for i_file, fname in enumerate(flist): f = h5py.File(fname, 'r') aux_evts = np.array(f.get('results')) aux_evts = aux_evts[aux_evts[:, 0] > Mjj_selection] if data_train is None: print('Labels:') print(list(f.get('labels'))) pb.show(0) data_train = aux_evts else: data_train = np.append(data_train, aux_evts, axis=0) pb.show(data_train.shape[0]) if data_train.shape[0] > maxEvts: break N_evts = i_file + 1 * 10000 QCD_xsec = 8.73e-6 #mb QCD_xsec *= 1e9 #fb print('\nLuminosity used for training') print('{:.2} fb^-1'.format(N_evts / QCD_xsec))
class NewCorrecaoEngine (): def __init__ (self, da, de, p, n, listaDeArquivos, rgb, histograma, pb=True): self.da = float(da)/float(100) self.de = float(de)/float(100) self.p = float(p)/float(100) self.n = float(n)/float(100) self.listaDeArquivos = listaDeArquivos self.lms = not rgb self.equalizar = histograma if pb: self.pb = ProgressBar() print "Iniciado correacao... da: " + str(self.da) self.resultado = [] if self.pb: self.pb.setTotal(len(listaDeArquivos)) ## def start(self, quandoTermina): if self.pb: self.pb.show() i = 0 while (i<len(self.listaDeArquivos)): arquivo = self.listaDeArquivos[i] label = str(str(arquivo).split('/')[-1]) im3 = Image.open(str(arquivo)) if self.pb: self.pb.setLabel('[1/1] Aplicando filtor unico em ' +str(label)) filtro1 = FiltroDeImagem(debug=False) filtro1.carregarImg(str(arquivo)) filtro1.callBackPogresso(self.percentagem) im1 = filtro1.filtrarNovo(self.lms, self.equalizar, self.p, self.de, self.da) # Filtro 1 { Protan } #self.pb.setLabel('[1/6] Aplicando filtro Protan em ' + str(label)) #filtro1 = FiltroDeImagem(debug=False) #filtro1.carregarImg(str(arquivo)) #filtro1.callBackPogresso(self.percentagem) #im1 = filtro1.filtrarProtan(equalizar = self.equalizar, lms = self.lms) # Filtro 2 { Deutan } #self.pb.setLabel('[2/6] Aplicando filtro Deutan em ' + str(label)) #filtro2 = FiltroDeImagem(debug=False) #filtro2.carregarImg(str(arquivo)) #filtro2.callBackPogresso(self.percentagem) #im2 = filtro2.filtrarDeutan(equalizar = self.equalizar, lms = self.lms) # Fuzzy 1 #fuz1 = Fuzzy(False,self.p,self.de,self.da,self.n) #self.pb.setLabel('[3/6] Aplicando filtro Fuzzy 1 em ' + str(label)) #fuz1.callBackProgresso(self.percentagem) #im1 = fuz1.multiplicaProtan(im1) # Fuzzy 2 #fuz2 = Fuzzy(False,self.p,self.de,self.da,self.n) #self.pb.setLabel('[4/6] Aplicando filtro Fuzzy 2 em ' + str(label)) #fuz1.callBackProgresso(self.percentagem) #im2 = fuz1.multiplicaDeutan(im2) #self.pb.setLabel('[5/6] Aplicando filtro Fuzzy 3 em ' + str(label)) #fuz1.callBackProgresso(self.percentagem) #im3 = fuz1.multiplicaNormal(im3) # Soma da matrizes #self.pb.setLabel('[6/6] Aplicando soma de matrizes em ' + str(label)) #fuz1.callBackProgresso(self.percentagem) #im4 = Image.open(str(arquivo)) #im4 = fuz1.soma(im1, im2, im3, im4) im1.save("default_output/" + str(i) + ".bmp", "BMP") self.resultado.append((label, str(arquivo), "default_output/" + str(i) + ".bmp")) i = i + 1 return self.resultado def percentagem(self, x): if self.pb: return self.pb.setPercentagem(x) else: return 0
f.close() except KeyboardInterrupt: args.MaxEvts = N_evts_processed break except: errors += 1 print('[ERROR {}]'.format(errors), fname, 'failed') if args.MaxEvts > 0 and N_evts_processed >= args.MaxEvts: break if N_chunks > 1: flist = glob(outname.replace('.npy', '_*.npy')) print('Merging {} chunks...'.format(len(flist))) pb = ProgressBar(1 + len(flist)) pb.show(0) for i, fname in enumerate(flist): pb.show(i + 1) dataset = np.concatenate((dataset, np.load(fname))) print('Cleaning chunks...') for fname in flist: os.system('rm ' + fname) print( 'Saving dataset with {} entries from {} files: '.format( dataset.shape[0], i + 1), outname) np.save(outname, dataset) print('\n')