Ejemplo n.º 1
0
def read_dijet_events_and_features_from_dir(dir_path, max_num_evts=int(1.2e6)):
    print('reading', dir_path)
    maxEvts = max_num_evts
    pb = ProgressBar(maxEvts)

    constituents_concat = []
    features_concat = []

    flist = get_file_list(dir_path)
    print('num files in dir:', len(flist))
    pb.show(0)

    for i_file, fname in enumerate(flist):
        try:
            constituents, features = read_dijet_events_and_features_from_file(
                fname)
            constituents, features = filter_arrays_on_value(
                [constituents, features], features[:, 0], Mjj_cut)
            constituents_concat.extend(constituents)
            features_concat.extend(features)
            pb.show(len(constituents_concat))
        except Exception as e:
            print("\nCould not read file ", fname, ': ', repr(e))
        if len(constituents_concat) > maxEvts:
            break

    particle_feature_names, = read_data_from_file(fname,
                                                  'particleFeatureNames')
    evt_feature_names, = read_data_from_file(fname, 'eventFeatureNames')

    print('\nnum files read in dir ', dir_path, ': ', i_file + 1)
    return [
        constituents_concat, particle_feature_names, features_concat,
        evt_feature_names
    ]
Ejemplo n.º 2
0
def read_dijet_events_and_features_pt_split(input_dir):

    flist = glob(input_dir + '/' + '*.h5')
    f = h5py.File(flist[0], 'r')
    keys = f.keys()

    maxEvts = int(1e9)
    pb = ProgressBar(maxEvts)

    jet_const_pt1_over = np.empty(
        (0, 2, 100, 3), int
    )  # each event = 2 jets, each 100 particles, each ( dEta, dPhi, pt )
    jet_const_pt1_under = np.empty((0, 2, 100, 3), int)
    jet_feat_pt1_over = np.empty((0, 11), int)  # 11 jet features per event
    jet_feat_pt1_under = np.empty((0, 11), int)

    fnum = 0
    evt_num = 0

    pb.show(evt_num)

    for fnum, fname in enumerate(flist):
        f = h5py.File(fname, 'r')
        aux_features = np.array(f.get('eventFeatures'))
        aux_constituents = np.array(f.get('jetConstituentsList'))
        evt_num = evt_num + aux_constituents.shape[0]
        # mass cut
        aux_features, aux_constituents = filter_arrays_on_value(
            [aux_features, aux_constituents], aux_features[:, 0], Mjj_cut)
        # pt_j1 separation
        pt_cut_passed = aux_features[:, 1] > pt_j1_cut
        jet_const_pt1_over = np.append(jet_const_pt1_over,
                                       aux_constituents[pt_cut_passed],
                                       axis=0)
        jet_feat_pt1_over = np.append(jet_feat_pt1_over,
                                      aux_features[pt_cut_passed],
                                      axis=0)
        jet_const_pt1_under = np.append(jet_const_pt1_under,
                                        aux_constituents[~pt_cut_passed],
                                        axis=0)
        jet_feat_pt1_under = np.append(jet_feat_pt1_under,
                                       aux_features[~pt_cut_passed],
                                       axis=0)

        pb.show(evt_num)

        if (fnum % 100 == 0):
            print(evt_num, ' events read, ',
                  jet_const_pt1_over.shape[0] + jet_const_pt1_under.shape[0],
                  ' events passed mass cut (size = ',
                  (jet_const_pt1_over.nbytes + jet_const_pt1_under.nbytes) /
                  1000., ' KB)')

    print(evt_num, ' events read, ',
          jet_const_pt1_over.shape[0] + jet_const_pt1_under.shape[0],
          ' events passed mass cut')
    return [
        jet_const_pt1_over, jet_feat_pt1_over, jet_const_pt1_under,
        jet_feat_pt1_under, keys
    ]
Ejemplo n.º 3
0
def read_in_data(file_dir):
    pb = ProgressBar(maxEvts)

    data = None
    flist = glob.glob(file_dir + '/' + '*result.h5')
    #print(flist)
    i_file = 0
    labels = None
    for i_file, fname in enumerate(flist):
        f = h5py.File(fname, 'r')
        #print(f.keys())
        aux_evts = np.array(f.get('results'))
        aux_evts = aux_evts[aux_evts[:, 0] > Mjj_selection]
        if data is None:
            labels = list(f.get('labels'))
            print('Labels:')
            print(labels)
            pb.show(0)
            data = aux_evts
        else:
            data = np.append(data, aux_evts, axis=0)

        pb.show(data.shape[0])

    print('\nnum files read in dir ', file_dir, ': ', i_file + 1)
    return data
Ejemplo n.º 4
0
def read_data_from_dir(dir_path, datakey, labelkey, mjj_cut=True):
    print('reading', dir_path)
    maxEvts = int(1e9)
    pb = ProgressBar(maxEvts)

    data = []

    flist = get_file_list(dir_path)
    print('num files in dir:', len(flist))
    pb.show(0)

    for i_file, fname in enumerate(flist):
        try:
            results, = read_data_from_file(fname, datakey)
            if mjj_cut:
                results = results[results[:, 0] > Mjj_cut]
            data.extend(results)
            pb.show(len(data))
        except Exception as e:
            print("\nCould not read file ", fname, ': ', repr(e))

    for i_file, fname in enumerate(flist):
        try:
            labels, = read_data_from_file(fname, labelkey)
            break
        except Exception as e:
            print("\nCould not read file ", fname, ': ', repr(e))
    print('Labels:', labels)

    print('\nnum files read in dir ', dir_path, ': ', i_file + 1)
    return [np.asarray(data), labels]
Ejemplo n.º 5
0
    def __init__(self, state, actionList, regions, names):
        discountFactor = 0.9
        self.rsa = {}
        self.regions = regions
        self.names = names

        p = ProgressBar(len(actionList), 'Querying', 'Complete')
        p.show(0)
        for act in actionList:
            ns = self.nextStates(state, act)
            nextqsa = 0
            for s in ns:
                A = self.possibleActions(s, regions)
                a = self.argmax(s, A)
                nextqsa += float(a[1])
            qsa = self.getQsa(state, act)
            rw = qsa - discountFactor * (1. / float(len(ns))) * nextqsa
            self.rsa[act] = rw
            p.show(actionList.index(act) + 1)
    def __init__(self, state, actionList, regions, names):
        discountFactor = 0.9
        self.rsa = {}
        self.regions = regions
        self.names = names

        p = ProgressBar(len(actionList), 'Querying', 'Complete')
        p.show(0)
        for act in actionList:
            ns = self.nextStates(state, act)
            nextqsa = 0
            for s in ns:
                A = self.possibleActions(s, regions)
                a = self.argmax(s,A)
                nextqsa += float(a[1])
            qsa = self.getQsa(state, act)
            rw = qsa - discountFactor*(1./float(len(ns)))*nextqsa
            self.rsa[act] = rw
            p.show(actionList.index(act)+1)
Ejemplo n.º 7
0
file_dir = '/eos/project/d/dshep/TOPCLASS/DijetAnomaly/VAE_results/'
maxEvts = int(1e6)
pb = ProgressBar(maxEvts)

data_train = None
name_tag = 'qcd_sqrtshatTeV_13TeV_PU40'
flist = glob(file_dir + name_tag + '/' + name_tag + '_*.h5')
for i_file, fname in enumerate(flist):
    f = h5py.File(fname, 'r')
    aux_evts = np.array(f.get('results'))
    aux_evts = aux_evts[aux_evts[:, 0] > Mjj_selection]
    if data_train is None:
        print('Labels:')
        print(list(f.get('labels')))
        pb.show(0)
        data_train = aux_evts
    else:
        data_train = np.append(data_train, aux_evts, axis=0)

    pb.show(data_train.shape[0])

    if data_train.shape[0] > maxEvts:
        break

N_evts = i_file + 1 * 10000
QCD_xsec = 8.73e-6  #mb
QCD_xsec *= 1e9  #fb
print('\nLuminosity used for training')
print('{:.2} fb^-1'.format(N_evts / QCD_xsec))
Ejemplo n.º 8
0
class NewCorrecaoEngine ():
	def __init__ (self, da, de, p, n, listaDeArquivos, rgb, histograma, pb=True):
		self.da = float(da)/float(100) 
		self.de = float(de)/float(100) 
		self.p =  float(p)/float(100)
		self.n =  float(n)/float(100)
		self.listaDeArquivos = listaDeArquivos
		self.lms = not rgb
		self.equalizar = histograma

		if pb:
			self.pb = ProgressBar()

		print "Iniciado correacao... da: " + str(self.da)
		self.resultado = []

		if self.pb:
			self.pb.setTotal(len(listaDeArquivos))
		##

	def start(self, quandoTermina):
		if self.pb:
			self.pb.show()
		i = 0


		while (i<len(self.listaDeArquivos)):
			arquivo = self.listaDeArquivos[i]
			label = str(str(arquivo).split('/')[-1])

			im3 = Image.open(str(arquivo))

			if self.pb:
				self.pb.setLabel('[1/1] Aplicando filtor unico em ' +str(label))
			filtro1 = FiltroDeImagem(debug=False)
			filtro1.carregarImg(str(arquivo))
			filtro1.callBackPogresso(self.percentagem)
			im1 = filtro1.filtrarNovo(self.lms, self.equalizar, self.p, self.de, self.da)


			# Filtro 1 { Protan }
			#self.pb.setLabel('[1/6] Aplicando filtro Protan em ' + str(label))
			#filtro1 = FiltroDeImagem(debug=False)
			#filtro1.carregarImg(str(arquivo))
			#filtro1.callBackPogresso(self.percentagem)
			#im1 = filtro1.filtrarProtan(equalizar = self.equalizar, lms = self.lms)

			# Filtro 2 { Deutan }
			#self.pb.setLabel('[2/6] Aplicando filtro Deutan em ' + str(label))
			#filtro2 = FiltroDeImagem(debug=False)
			#filtro2.carregarImg(str(arquivo))
			#filtro2.callBackPogresso(self.percentagem)
			#im2 = filtro2.filtrarDeutan(equalizar = self.equalizar, lms = self.lms)

			# Fuzzy 1
			#fuz1 = Fuzzy(False,self.p,self.de,self.da,self.n)
			#self.pb.setLabel('[3/6] Aplicando filtro Fuzzy 1 em ' + str(label))
			#fuz1.callBackProgresso(self.percentagem)
			#im1 = fuz1.multiplicaProtan(im1)

			# Fuzzy 2
			#fuz2 = Fuzzy(False,self.p,self.de,self.da,self.n)
			#self.pb.setLabel('[4/6] Aplicando filtro Fuzzy 2 em ' + str(label))
			#fuz1.callBackProgresso(self.percentagem)
			#im2 = fuz1.multiplicaDeutan(im2)


			#self.pb.setLabel('[5/6] Aplicando filtro Fuzzy 3 em ' + str(label))
			#fuz1.callBackProgresso(self.percentagem)
			#im3 = fuz1.multiplicaNormal(im3)

			# Soma da matrizes
			#self.pb.setLabel('[6/6] Aplicando soma de matrizes em ' + str(label))
			#fuz1.callBackProgresso(self.percentagem)
			#im4 = Image.open(str(arquivo))
			#im4 = fuz1.soma(im1, im2, im3, im4)

			im1.save("default_output/" + str(i) + ".bmp", "BMP")

			self.resultado.append((label, str(arquivo), "default_output/" + str(i) + ".bmp"))

			i = i + 1

		return self.resultado

	def percentagem(self, x):
		if self.pb:
			return self.pb.setPercentagem(x)
		else:
			return 0
            f.close()
        except KeyboardInterrupt:
            args.MaxEvts = N_evts_processed
            break
        except:
            errors += 1
            print('[ERROR {}]'.format(errors), fname, 'failed')

        if args.MaxEvts > 0 and N_evts_processed >= args.MaxEvts:
            break

    if N_chunks > 1:
        flist = glob(outname.replace('.npy', '_*.npy'))
        print('Merging {} chunks...'.format(len(flist)))

        pb = ProgressBar(1 + len(flist))
        pb.show(0)
        for i, fname in enumerate(flist):
            pb.show(i + 1)
            dataset = np.concatenate((dataset, np.load(fname)))

        print('Cleaning chunks...')
        for fname in flist:
            os.system('rm ' + fname)

    print(
        'Saving dataset with {} entries from {} files: '.format(
            dataset.shape[0], i + 1), outname)
    np.save(outname, dataset)
    print('\n')