def load(self, fList): from Gaugi import load from Gaugi import csvStr2List, expandFolders, progressbar fList = csvStr2List(fList) fList = expandFolders(fList) from saphyra import TunedData_v1 self._obj = TunedData_v1() for inputFile in progressbar(fList, len(fList), prefix="Reading tuned data collection...", logger=self._logger): raw = load(inputFile) # get the file version version = raw['__version'] # the current file version if version == 1: obj = TunedData_v1.fromRawObj(raw) self._obj.merge(obj) else: # error because the file does not exist self._logger.fatal('File version (%d) not supported in (%s)', version, inputFile) # return the list of keras models return self._obj
def __call__(self, inputFiles): obj = None for idx, f in progressbar(enumerate(inputFiles), len(inputFiles), 'Reading...: ', 60, logger=self._logger): #d = dict(np.load(f,allow_pickle=True)) d = dict(load(f)) obj = self.merge(d, obj, self._skip_these_keys) if obj else d return obj
def GetHistogramRootPaths(triggerList, removeInnefBefore=False, is_emulation=False, logger=None): plot_names = ['et', 'eta', 'mu'] level_names = ['L1Calo', 'L2Calo', 'L2', 'EFCalo', 'HLT'] levels_input = ['L1Calo', 'L1Calo', 'L1Calo', 'L2', 'EFCalo'] from Gaugi import progressbar paths = [] keys = [] entries = len(triggerList) step = int(entries / 100) if int(entries / 100) > 0 else 1 for trigItem in progressbar(triggerList, entries, step=step, logger=logger, prefix='Making paths...'): isL1 = True if trigItem.startswith('L1_') else False these_level_names = ['L1Calo'] if isL1 else level_names ### Retrieve all paths for idx, level in enumerate(these_level_names): for histname in plot_names: if not isL1 and 'et' == histname and is_high_et(trigItem): histname = 'highet' if is_emulation: histpath = 'HLT/Egamma/Expert/{TRIGGER}/Emulation/{LEVEL}/{HIST}' else: histpath = 'HLT/Egamma/Expert/{TRIGGER}/Efficiency/{LEVEL}/{HIST}' paths.append( histpath.format(TRIGGER=trigItem, HIST='match_' + histname, LEVEL=level)) if removeInnefBefore: paths.append( histpath.format( TRIGGER=trigItem, HIST=('match_' + histname if idx != 0 else histname), LEVEL=levels_input[idx])) else: paths.append( histpath.format(TRIGGER=trigItem, HIST=histname, LEVEL='L1Calo')) if 'highet' == histname: histname = 'et' keys.append(trigItem + '_' + level + '_match_' + histname) keys.append(trigItem + '_' + level + '_' + histname) # Loop over triggers return paths, keys
def fill(self, generator , paths): hists = { branch:[[None for _ in range(len(self.__etabins)-1)] for __ in range(len(self.__etbins)-1)] for branch in self.__hist.keys()} # Prepare all histograms for et_bin, eta_bin in progressbar(product(range(len(self.__etbins)-1),range(len(self.__etabins)-1)), (len(self.__etbins)-1)*(len(self.__etabins)-1), prefix = "Reading... " ): data, features = generator(paths[et_bin][eta_bin]) for branch, hist in self.__hist.items(): th1 = TH1F( branch+'_et'+str(et_bin)+'_eta'+str(eta_bin), "", hist['bins'], hist['xmin'], hist['xmax'] ) values = data[:, features.index(branch)] w = array.array( 'd', np.ones_like(values) ) th1.FillN( len(values), array.array('d', values.tolist()), w) hists[branch][et_bin][eta_bin] = th1 return hists
def create(self, volume, taskname, dataFile, configFile, secondaryDS, execCommand, queue='gpu', bypass=False, dry_run=False, force_dummy=False): # check task policy (user.username) if taskname.split('.')[0] != 'user': return (StatusCode.FATAL, 'The task name must starts with user.$USER.taskname.') # check task policy (username must exist into the database) username = taskname.split('.')[1] if not username in [ user.getUserName() for user in self.__db.getAllUsers() ]: return (StatusCode.FATAL, 'The username does not exist into the database.') if self.__db.getUser(username).getTask(taskname) is not None: return (StatusCode.FATAL, "The task exist into the database. Abort.") # # Check if all datasets are registered into the database # if self.__db.getDataset(username, dataFile) is None: return ( StatusCode.FATAL, "The file (%s) does not exist into the database. Should be registry first." % dataFile) if self.__db.getDataset(username, configFile) is None: return ( StatusCode.FATAL, "The config file (%s) does not exist into the database. Should be registry first." % configFile) secondaryDS = eval(secondaryDS) for key in secondaryDS.keys(): if self.__db.getDataset(username, secondaryDS[key]) is None: return ( StatusCode.FATAL, "The secondary data file (%s) does not exist into the database. Should be registry first." % secondaryDS[key]) # # check exec command policy # if (not '%DATA' in execCommand): return ( StatusCode.FATAL, "The exec command must include '%DATA' into the string. This will substitute to the dataFile when start." ) if (not '%IN' in execCommand): return ( StatusCode.FATAL, "The exec command must include '%IN' into the string. This will substitute to the configFile when start." ) if not '%OUT' in execCommand: return ( StatusCode.FATAL, "The exec command must include '%OUT' into the string. This will substitute to the outputFile when start." ) for key in secondaryDS.keys(): if not key in execCommand: return (StatusCode.FATAL, ( "The exec command must include %s into the string. This will substitute to %s when start" ) % (key, secondaryDS[key])) # # Create the output file # outputFile = volume + '/' + taskname if os.path.exists(outputFile): MSG_WARNING(self, "The task dir exist into the storage. Beware!") else: # create the task dir MSG_INFO(self, "Creating the task dir in %s", outputFile) os.system('mkdir -p %s ' % (outputFile)) # # create the task into the database # if not dry_run: try: user = self.__db.getUser(username) task = self.__db.createTask(user, taskname, configFile, dataFile, outputFile, "", secondaryDataPath=secondaryDS, templateExecArgs=execCommand, queueName=queue) task.setSignal(Signal.WAITING) task.setStatus(Status.HOLD) configFiles = self.__db.getDataset(username, configFile).getAllFiles() _dataFile = self.__db.getDataset( username, dataFile).getAllFiles()[0].getPath() _secondaryDS = {} for key in secondaryDS.keys(): _secondaryDS[key] = self.__db.getDataset( username, secondaryDS[key]).getAllFiles()[0].getPath() offset_job_id = self.__db.generateId(Job) for idx, file in progressbar(enumerate(configFiles), len(configFiles), prefix='Creating...'): _outputFile = outputFile + '/job_configId_%d' % idx _configFile = file.getPath() command = execCommand command = command.replace('%DATA', _dataFile) command = command.replace('%IN', _configFile) command = command.replace('%OUT', _outputFile) for key in _secondaryDS: command = command.replace(key, _secondaryDS[key]) job = self.__db.createJob(task, _configFile, idx, execArgs=command, priority=-1, id=offset_job_id + idx) job.setStatus('assigned' if bypass else 'registered') task.setStatus('registered') self.__db.commit() except Exception as e: MSG_ERROR(self, e) return (StatusCode.FATAL, "Unknown error.") return (StatusCode.SUCCESS, "Succefully created.")
if type(group) is tuple: for t in group: triggerList.append(t) else: triggerList.append(group) ### Making all paths entries = len(triggerList) step = int(entries / 100) if int(entries / 100) > 0 else 1 paths_test = [] paths_ref = [] keys = [] for trigItem in progressbar(triggerList, entries, step=step, prefix='Making paths...', logger=mainLogger): #mainLogger.info(trigItem) isL1 = True if trigItem.startswith('L1_') else False these_level_names = ['L1Calo'] if isL1 else level_names ### Retrieve all paths for level in these_level_names: for histname in plot_names: if not isL1 and 'et' == histname and is_high_et(trigItem): histname = 'highet' histpath = 'HLT/Egamma/Expert/{TRIGGER}/{CORE}/{LEVEL}/{HIST}' # Ref if args.reference:
def __call__(self, sgnFileList, bkgFileList, ofile, dump_csv=False): # get all keys paths = expandFolders(sgnFileList) jobIDs = sorted( list( set([ self._pat.match(f).group('binID') for f in paths if self._pat.match(f) is not None ]))) npatterns = {} etBins = None etaBins = None debug = False for id in jobIDs: sgnSubFileList = [] for f in expandFolders(sgnFileList): if id in f: sgnSubFileList.append(f) if debug: sgnSubFileList = sgnSubFileList[0:11] reader = ReaderPool(sgnSubFileList, DataReader(self._skip_these_keys), self._nFilesPerJob, self._nthreads) MSG_INFO(self, "Reading signal files...") outputs = reader() sgnDict = outputs.pop() if len(outputs) > 0: for from_dict in progressbar(outputs, len(outputs), 'Mearging signal files: ', 60, logger=self._logger): DataReader.merge(from_dict, sgnDict, self._skip_these_keys) bkgSubFileList = [] for f in expandFolders(bkgFileList): if id in f: bkgSubFileList.append(f) if debug: bkgSubFileList = bkgSubFileList[0:11] reader = ReaderPool(bkgSubFileList, DataReader(self._skip_these_keys), self._nFilesPerJob, self._nthreads) MSG_INFO(self, "Reading background files...") outputs = reader() bkgDict = outputs.pop() if len(outputs) > 0: for from_dict in progressbar(outputs, len(outputs), 'Mearging background files: ', 60, logger=self._logger): DataReader.merge(from_dict, bkgDict, self._skip_these_keys) # Loop over regions d = { "features": sgnDict["features"], "etBins": sgnDict["etBins"], "etaBins": sgnDict["etaBins"], "etBinIdx": sgnDict["etBinIdx"], "etaBinIdx": sgnDict["etaBinIdx"], } #if not etBins: etBins = sgnDict["etBins"] etBins = sgnDict["etBins"] #if not etaBins: etaBins = sgnDict["etaBins"] etaBins = sgnDict["etaBins"] d['data'] = np.concatenate( (sgnDict['pattern_' + id], bkgDict['pattern_' + id])).astype('float32') d['target'] = np.concatenate( (np.ones((sgnDict['pattern_' + id].shape[0], )), np.zeros( (bkgDict['pattern_' + id].shape[0], )))).astype('int16') if sgnDict['pattern_' + id] is not None: MSG_INFO(self, 'sgnData_%s : (%d, %d)', id, sgnDict['pattern_' + id].shape[0], sgnDict['pattern_' + id].shape[1]) else: MSG_INFO(self, 'sgnData_%s : empty', id) if bkgDict['pattern_' + id] is not None: MSG_INFO(self, 'bkgData_%s : (%d, %d)', id, bkgDict['pattern_' + id].shape[0], bkgDict['pattern_' + id].shape[1]) else: MSG_INFO(self, 'bkgData_%s : empty', id) MSG_INFO(self, "Saving: %s", ofile + '_' + id) npatterns['sgnPattern_' + id] = int(sgnDict['pattern_' + id].shape[0]) npatterns['bkgPattern_' + id] = int(bkgDict['pattern_' + id].shape[0]) save(d, ofile + '_' + id, protocol='savez_compressed') if dump_csv: # Save as csv for pandas dd = {} for ikey, key in enumerate(d['features']): dd[key] = d['data'][:, ikey] dd['target'] = d['target'] df = pd.DataFrame(dd) df.to_csv(ofile + '_' + id + '.csv') self.plotNSamples(npatterns, etBins, etaBins)
def fill(self, data_paths, models, reference_values, output_dir, verbose=False, except_these_bins=[]): from Gaugi.monet.AtlasStyle import SetAtlasStyle SetAtlasStyle() # create directory localpath = os.getcwd() + '/' + output_dir try: if not os.path.exists(localpath): os.makedirs(localpath) except: MSG_WARNING(self, 'The director %s exist.', localpath) # make template dataframe dataframe = collections.OrderedDict({ 'name': [], 'et_bin': [], 'eta_bin': [], 'reference_signal_passed': [], 'reference_signal_total': [], 'reference_signal_eff': [], 'reference_background_passed': [], 'reference_background_total': [], 'reference_background_eff': [], 'signal_passed': [], 'signal_total': [], 'signal_eff': [], 'background_passed': [], 'background_total': [], 'background_eff': [], 'signal_corrected_passed': [], 'signal_corrected_total': [], 'signal_corrected_eff': [], 'background_corrected_passed': [], 'background_corrected_total': [], 'background_corrected_eff': [], }) # reduce verbose def add(key, value): dataframe[key].append(value) # Loop over all et/eta bins for et_bin, eta_bin in progressbar( product(range(len(self.__etbins) - 1), range(len(self.__etabins) - 1)), (len(self.__etbins) - 1) * (len(self.__etabins) - 1), prefix="Fitting... "): path = data_paths[et_bin][eta_bin] data, target, avgmu = self.__generator(path) references = reference_values[et_bin][eta_bin] model = models[et_bin][eta_bin] model['thresholds'] = {} # Get the predictions outputs = model['model'].predict(data, batch_size=1024, verbose=verbose).flatten() # Get all limits using the output xmin = self.__xmin if self.__xmin else int( np.percentile(outputs, self.__xmin_percentage)) xmax = self.__xmax if self.__xmax else int( np.percentile(outputs, self.__xmax_percentage)) MSG_DEBUG(self, 'Setting xmin to %1.2f and xmax to %1.2f', xmin, xmax) xbins = int((xmax - xmin) / self.__x_bin_size) # Fill 2D histograms from ROOT import TH2F import array if type(self.__y_bin_size) is float: ybins = int((self.__ymax - self.__ymin) / self.__y_bin_size) th2_signal = TH2F('th2_signal_et%d_eta%d' % (et_bin, eta_bin), '', xbins, xmin, xmax, ybins, self.__ymin, self.__ymax) th2_background = TH2F( 'th2_background_et%d_eta%d' % (et_bin, eta_bin), '', xbins, xmin, xmax, ybins, self.__ymin, self.__ymax) else: y_bins_edges = self.__y_bin_size[et_bin][eta_bin] th2_signal = TH2F('th2_signal_et%d_eta%d' % (et_bin, eta_bin), '', xbins, xmin, xmax, len(y_bins_edges) - 1, array.array('d', y_bins_edges)) th2_background = TH2F( 'th2_background_et%d_eta%d' % (et_bin, eta_bin), '', xbins, xmin, xmax, len(y_bins_edges) - 1, array.array('d', y_bins_edges)) # fill hists w = array.array('d', np.ones_like(outputs[target == 1])) th2_signal.FillN(len(outputs[target == 1]), array.array('d', outputs[target == 1].tolist()), array.array('d', avgmu[target == 1].tolist()), w) w = array.array('d', np.ones_like(outputs[target == 0])) th2_background.FillN( len(outputs[target == 0]), array.array('d', outputs[target != 1].tolist()), array.array('d', avgmu[target != 1].tolist()), w) MSG_DEBUG(self, 'Applying linear correction to et%d_eta%d bin.', et_bin, eta_bin) for name, ref in references.items(): if ref['pd_epsilon'] == 0.0: ref_value = ref['pd'] else: add_fac = (1 - ref['pd']) * ref['pd_epsilon'] ref_value = ref['pd'] + add_fac MSG_INFO( self, 'Add %1.2f %% in reference pd -> new reference pd: %1.2f', ref['pd_epsilon'], add_fac) false_alarm = 1.0 while false_alarm > self.__false_alarm_limit: # Get the threshold when we not apply any linear correction threshold, _ = self.find_threshold( th2_signal.ProjectionX(), ref_value) # Get the efficiency without linear adjustment #signal_eff, signal_num, signal_den = self.calculate_num_and_den_from_hist(th2_signal, 0.0, threshold) signal_eff, signal_num, signal_den = self.calculate_num_and_den_from_output( outputs[target == 1], avgmu[target == 1], 0.0, threshold) #background_eff, background_num, background_den = self.calculate_num_and_den_from_hist(th2_background, 0.0, threshold) background_eff, background_num, background_den = self.calculate_num_and_den_from_output( outputs[target != 1], avgmu[target != 1], 0.0, threshold) # Apply the linear adjustment and fix it in case of positive slope slope, offset, x_points, y_points, error_points = self.fit( th2_signal, ref_value) # put inside of the ref array apply_fit = True # case 1: The user select each bin will not be corrected for (this_et_bin, this_eta_bin) in except_these_bins: if et_bin == this_et_bin and eta_bin == this_eta_bin: apply_fit = False # case 2: positive slope if slope > 0: MSG_WARNING( self, "Retrieved positive angular factor of the linear correction, setting to 0!" ) apply_fit = False slope = slope if apply_fit else 0 offset = offset if apply_fit else threshold # Get the efficiency with linear adjustment #signal_corrected_eff, signal_corrected_num, signal_corrected_den = self.calculate_num_and_den_from_hist(th2_signal, slope, offset) signal_corrected_eff, signal_corrected_num, signal_corrected_den = self.calculate_num_and_den_from_output(outputs[target==1], \ avgmu[target==1], slope, offset) #background_corrected_eff, background_corrected_num, background_corrected_den = self.calculate_num_and_den_from_hits(th2_background, slope, offset) background_corrected_eff, background_corrected_num, background_corrected_den = self.calculate_num_and_den_from_output(outputs[target!=1], \ avgmu[target!=1], slope, offset) false_alarm = background_corrected_num / background_corrected_den # get the passed/total if false_alarm > self.__false_alarm_limit: # Reduce the reference value by hand ref_value -= 0.0025 MSG_DEBUG(self, 'Reference name: %s, target: %1.2f%%', name, ref['pd'] * 100) MSG_DEBUG(self, 'Signal with correction is: %1.2f%%', signal_corrected_num / signal_corrected_den * 100) MSG_DEBUG( self, 'Background with correction is: %1.2f%%', background_corrected_num / background_corrected_den * 100) # decore the model array model['thresholds'][name] = { 'offset': offset, 'slope': slope, 'threshold': threshold, 'reference_pd': ref['pd'], 'reference_fa': ref['fa'], } paths = [] # prepate 2D histograms info = models[et_bin][eta_bin]['thresholds'][name] outname = localpath + '/th2_signal_%s_et%d_eta%d' % ( name, et_bin, eta_bin) output = self.plot_2d_hist(th2_signal, slope, offset, x_points, y_points, error_points, outname, xlabel='<#mu>', etBinIdx=et_bin, etaBinIdx=eta_bin, etBins=self.__etbins, etaBins=self.__etabins, plot_stage=self.__plot_stage) paths.append(output) outname = localpath + '/th2_background_%s_et%d_eta%d' % ( name, et_bin, eta_bin) output = self.plot_2d_hist(th2_background, slope, offset, x_points, y_points, error_points, outname, xlabel='<#mu>', etBinIdx=et_bin, etaBinIdx=eta_bin, etBins=self.__etbins, etaBins=self.__etabins, plot_stage=self.__plot_stage) paths.append(output) model['thresholds'][name]['figures'] = paths # et/eta bin information add('name', name) add('et_bin', et_bin) add('eta_bin', eta_bin) # reference values add('reference_signal_passed', int(ref['pd'] * signal_den)) add('reference_signal_total', signal_den) add('reference_signal_eff', ref['pd']) add('reference_background_passed', int(ref['fa'] * background_den)) add('reference_background_total', background_den) add('reference_background_eff', ref['fa']) # non-corrected values add('signal_passed', signal_num) add('signal_total', signal_den) add('signal_eff', signal_num / signal_den) add('background_passed', background_num) add('background_total', background_den) add('background_eff', background_num / background_den) # corrected values add('signal_corrected_passed', signal_corrected_num) add('signal_corrected_total', signal_corrected_den) add('signal_corrected_eff', signal_corrected_num / signal_corrected_den) add('background_corrected_passed', background_corrected_num) add('background_corrected_total', background_corrected_den) add('background_corrected_eff', background_corrected_num / background_corrected_den) # convert to pandas dataframe self.__table = pandas.DataFrame(dataframe)
def plot(self, dirnames, pdfoutputs, pdftitles, runLabel='', doPDF=True): SetAtlasStyle() beamer_plots = {} global tobject_collector basepath = self.getProperty("Basepath") etBins = self.getProperty("EtBinningValues") etaBins = self.getProperty("EtaBinningValues") for idx, feat in enumerate(self.__selectionFeatures): dirname = os.getcwd() + '/' + dirnames[idx] mkdir_p(dirname) # hold selection name selection_name = feat.name_a() + '_Vs_' + feat.name_b() # For beamer... if not selection_name in beamer_plots.keys(): beamer_plots[selection_name] = {} beamer_plots[selection_name]['integrated'] = {} ### Plot binning plots if (len(etBins) * len(etaBins)) > 1: for etBinIdx, etaBinIdx in progressbar( product(range(len(etBins) - 1), range(len(etaBins) - 1)), (len(etBins) - 1) * (len(etaBins) - 1), prefix="Plotting... ", logger=self._logger): # hold binning name binning_name = ('et%d_eta%d') % (etBinIdx, etaBinIdx) # for beamer... if not binning_name in beamer_plots[selection_name].keys(): beamer_plots[selection_name][binning_name] = {} ### loop over standard quantities for key in standardQuantitiesNBins.keys(): outname = dirname + '/' + selection_name.replace( '_Vs_', '_') + '_' + key + '_' + binning_name out = PlotQuantities(basepath + '/' + selection_name + '/' + binning_name, key, outname, etidx=etBinIdx, etaidx=etaBinIdx, xlabel=electronQuantities[key], divide='b', runLabel=runLabel) beamer_plots[selection_name][binning_name][key] = out #del tobject_collector[:] ### loop over info quantities for key in basicInfoQuantities.keys(): outname = dirname + '/' + selection_name.replace( '_Vs_', '_') + '_' + key + '_' + binning_name out = PlotQuantities(basepath + '/' + selection_name + '/' + binning_name, key, outname, etidx=etBinIdx, etaidx=etaBinIdx, xlabel=basicInfoQuantities[key], divide='b', runLabel=runLabel) beamer_plots[selection_name][binning_name][key] = out #del tobject_collector[:] beamer_plots[selection_name][binning_name]['statistics'] = GetStatistics(basepath+'/'+selection_name+'/'+binning_name, \ 'avgmu',etidx=etBinIdx,etaidx=etaBinIdx) #### Plot integrated histograms ### loop over standard quantities for key in standardQuantitiesNBins.keys(): outname = dirname + '/' + selection_name.replace( '_Vs_', '_') + '_' + key out = PlotQuantities(basepath + '/' + selection_name, key, outname, xlabel=electronQuantities[key], divide='b', runLabel=runLabel, addbinlines=True) beamer_plots[selection_name]['integrated'][key] = out tobject_collector = [] gc.collect() ### loop over info quantities for key in basicInfoQuantities.keys(): outname = dirname + '/' + selection_name.replace( '_Vs_', '_') + '_' + key + '_' + binning_name out = PlotQuantities(basepath + '/' + selection_name, key, outname, xlabel=basicInfoQuantities[key], divide='b', runLabel=runLabel, addbinlines=True) beamer_plots[selection_name]['integrated'][key] = out tobject_collector = [] gc.collect() beamer_plots[selection_name]['integrated'][ 'statistics'] = GetStatistics(basepath + '/' + selection_name, 'avgmu')
def plot(self, dirnames, pdfoutputs, pdftitles, runLabel='', doPDF=True, legends=None): legends = [ 'Both Approved', 'Ringer Rejected', 'Ringer Approved', 'Both Rejected' ] if legends is None else legends SetAtlasStyle() beamer_plots = {} global tobject_collector basepath = self.getProperty("Basepath") etBins = self.getProperty("EtBinningValues") etaBins = self.getProperty("EtaBinningValues") sg = self.getStoreGateSvc() for idx, feat in enumerate(self.__quadrantFeatures): dirname = os.getcwd() + '/' + dirnames[idx] mkdir_p(dirname) # hold quadrant name quadrant_name = feat.name_a() + '_Vs_' + feat.name_b() # For beamer... if not quadrant_name in beamer_plots.keys(): beamer_plots[quadrant_name] = {} beamer_plots[quadrant_name]['integrated'] = {} ### Plot binning plots if (len(etBins) * len(etaBins)) > 1: for etBinIdx, etaBinIdx in progressbar( product(range(len(etBins) - 1), range(len(etaBins) - 1)), (len(etBins) - 1) * (len(etaBins) - 1), prefix="Plotting... ", logger=self._logger): # hold binning name binning_name = ('et%d_eta%d') % (etBinIdx, etaBinIdx) # for beamer... if not binning_name in beamer_plots[quadrant_name].keys(): beamer_plots[quadrant_name][binning_name] = {} ### loop over standard quantities for key in standardQuantitiesNBins.keys(): outname = dirname + '/' + quadrant_name.replace( '_Vs_', '_') + '_' + key + '_' + binning_name out = PlotQuantities(sg, basepath + '/' + quadrant_name + '/' + binning_name, key, outname, legends, etBins=etBins, etaBins=etaBins, etidx=etBinIdx, etaidx=etaBinIdx, xlabel=electronQuantities[key], divide='b', runLabel=runLabel) beamer_plots[quadrant_name][binning_name][key] = out #del tobject_collector[:] ### loop over info quantities for key in basicInfoQuantities.keys(): outname = dirname + '/' + quadrant_name.replace( '_Vs_', '_') + '_' + key + '_' + binning_name out = PlotQuantities(sg, basepath + '/' + quadrant_name + '/' + binning_name, key, outname, legends, etBins=etBins, etaBins=etaBins, etidx=etBinIdx, etaidx=etaBinIdx, xlabel=basicInfoQuantities[key], divide='b', runLabel=runLabel) beamer_plots[quadrant_name][binning_name][key] = out #del tobject_collector[:] beamer_plots[quadrant_name][binning_name]['statistics'] = GetStatistics(sg, basepath+'/'+quadrant_name+'/'+binning_name, \ 'avgmu',etidx=etBinIdx,etaidx=etaBinIdx, etBins=etBins, etaBins=etaBins) #### Plot integrated histograms ### loop over standard quantities for key in standardQuantitiesNBins.keys(): outname = dirname + '/' + quadrant_name.replace( '_Vs_', '_') + '_' + key out = PlotQuantities(sg, basepath + '/' + quadrant_name, key, outname, legends, xlabel=electronQuantities[key], divide='b', runLabel=runLabel, addbinlines=True, etBins=etBins, etaBins=etaBins) beamer_plots[quadrant_name]['integrated'][key] = out tobject_collector = [] gc.collect() ### loop over info quantities for key in basicInfoQuantities.keys(): outname = dirname + '/' + quadrant_name.replace( '_Vs_', '_') + '_' + key + '_' + binning_name out = PlotQuantities(sg, basepath + '/' + quadrant_name, key, outname, legends, xlabel=basicInfoQuantities[key], divide='b', runLabel=runLabel, addbinlines=True, etBins=etBins, etaBins=etaBins) beamer_plots[quadrant_name]['integrated'][key] = out tobject_collector = [] gc.collect() beamer_plots[quadrant_name]['integrated'][ 'statistics'] = GetStatistics(sg, basepath + '/' + quadrant_name, 'avgmu', etBins=etBins, etaBins=etaBins) if doPDF: ### Make Latex str et/eta labels etbins_str = [] etabins_str = [] for etBinIdx in range(len(etBins) - 1): etbin = (etBins[etBinIdx], etBins[etBinIdx + 1]) if etbin[1] > 100: etbins_str.append(r'$E_{T}\text{[GeV]} > %d$' % etbin[0]) else: etbins_str.append(r'$%d < E_{T} \text{[Gev]}<%d$' % etbin) for etaBinIdx in range(len(etaBins) - 1): etabin = (etaBins[etaBinIdx], etaBins[etaBinIdx + 1]) etabins_str.append(r'$%.2f<\eta<%.2f$' % etabin) for slideIdx, feat in enumerate(self.__quadrantFeatures): with BeamerTexReportTemplate1(theme='Berlin', _toPDF=True, title=pdftitles[slideIdx], outputFile=pdfoutputs[slideIdx], font='structurebold'): # hold quadrant name quadrant_name = feat.name_a() + '_Vs_' + feat.name_b() section_name = feat.name_a() + ' Vs ' + feat.name_b() #with BeamerSection( name = 'x' ): with BeamerSection(name='Integrated Quantities'): # prepare files for basic quantities figures = [] for key in ['et', 'eta', 'phi', 'avgmu', 'nvtx']: figures.append( beamer_plots[quadrant_name]['integrated'][key]) BeamerMultiFigureSlide( title='Basic Quantities', paths=figures, nDivWidth=3 # x , nDivHeight=2 # y , texts=None, fortran=False, usedHeight=0.6, usedWidth=0.9) # prepare files for calo standard quantities figures = [] for key in [ 'eratio', 'rhad', 'reta', 'rphi', 'f1', 'f3', 'wtots1', 'weta2' ]: figures.append( beamer_plots[quadrant_name]['integrated'][key]) BeamerMultiFigureSlide( title='Standard Calo Quantities', paths=figures, nDivWidth=4 # x , nDivHeight=2 # y , texts=None, fortran=False, usedHeight=0.6, usedWidth=0.9) # prepare files for calo standard quantities figures = [] for key in [ 'd0significance', 'trackd0pvunbiased', 'deltaPhiRescaled2', 'eProbabilityHT', 'TRT_PID', 'deltaEta1', 'DeltaPOverP' ]: figures.append( beamer_plots[quadrant_name]['integrated'][key]) BeamerMultiFigureSlide( title='Standard Track Quantities', paths=figures, nDivWidth=4 # x , nDivHeight=2 # y , texts=None, fortran=False, usedHeight=0.6, usedWidth=0.9) section = ['Basic Quantity'] * 2 section.extend(['Standard Calo Quantity'] * 8) section.extend(['Standard Track Quantity'] * 7) section.extend([ 'Likelihood Discriminant', 'Ringer Neural Discriminant' ]) for idx, key in enumerate([ 'avgmu', 'nvtx', 'eratio', 'rhad', 'reta', 'rphi', 'f1', 'f3', 'wtots1', 'weta2', 'd0significance', 'trackd0pvunbiased', 'deltaPhiRescaled2', 'eProbabilityHT', 'TRT_PID', 'deltaEta1', 'DeltaPOverP', #'lhOutput','nnOutput' ]): with BeamerSection(name=key.replace('_', '\_')): figures = [] binning_name_list = [] for etBinIdx, etaBinIdx in product( range(len(etBins) - 1), range(len(etaBins) - 1)): binning_name_list.append( ('et%d_eta%d') % (etBinIdx, etaBinIdx)) while len(binning_name_list) > 0: figures = [] if len(binning_name_list) > 9: for _ in range(9): binning_name = binning_name_list.pop(0) figures.append( beamer_plots[quadrant_name] [binning_name][key]) else: for _ in range(len(binning_name_list)): binning_name = binning_name_list.pop(0) figures.append( beamer_plots[quadrant_name] [binning_name][key]) BeamerMultiFigureSlide( title=section[idx] + ' (' + key.replace('_', '\_') + ')', paths=figures, nDivWidth=4 # x , nDivHeight=3 # y , texts=None, fortran=False, usedHeight=0.7, usedWidth=0.8) with BeamerSection(name='Statistics'): ### Prepare tables lines1 = [] lines1 += [HLine(_contextManaged=False)] lines1 += [HLine(_contextManaged=False)] #lines1 += [ TableLine( columns = ['kinematic region'] + reduce(lambda x,y: x+y,[ [r'\multicol{4}{*}{'+s+'}','','',''] for s in etbins_str]), \ lines1 += [ TableLine( columns = ['kinematic region'] + reduce(lambda x,y: x+y,[ [s,'','',''] for s in etbins_str]), \ _contextManaged = False ) ] lines1 += [HLine(_contextManaged=False)] lines1 += [ TableLine( columns = ['Det. Region'] + reduce(lambda x,y: x+y,[[r'$Q_{ij}$',r'$\rho{ij}$',r'$\kappa_{P}$',r'$dis_{ij}$'] \ #lines1 += [ TableLine( columns = ['Det. Region'] + reduce(lambda x,y: x+y,[['a','b','c','d'] \ for _ in etbins_str]), _contextManaged = False ) ] lines1 += [HLine(_contextManaged=False)] for etaBinIdx in range(len(etaBins) - 1): str_values = [] for etBinIdx in range(len(etBins) - 1): binning_name = ('et%d_eta%d') % (etBinIdx, etaBinIdx) stats = beamer_plots[quadrant_name][ binning_name]['statistics'] str_values += [ '%1.2f' % stats['Qij'], '%1.2f' % stats['Pij'], '%1.2f' % stats['Kp'], '%1.2f' % stats['dis_ij'] ] lines1 += [ TableLine(columns=[etabins_str[etaBinIdx]] + str_values, _contextManaged=False) ] lines1 += [HLine(_contextManaged=False)] lines1 += [HLine(_contextManaged=False)] with BeamerSlide(title="The General Statistics"): with Table( caption='The statistics pair wise values.' ) as table: with ResizeBox(size=0.9) as rb: with Tabular(columns='|l|' + 'cccc|' * len(etbins_str)) as tabular: tabular = tabular for line in lines1: if isinstance(line, TableLine): tabular += line else: TableLine(line, rounding=None)
def fill(self, path, tag): ''' This method will fill the information dictionary and convert then into a pandas DataFrame. Arguments.: - path: the path to the tuned files; - tag: the training tag used; ''' paths = expand_folders(path) MSG_INFO(self, "Reading file for %s tag from %s", tag, path) # Creating the dataframe dataframe = collections.OrderedDict({ 'train_tag': [], 'et_bin': [], 'eta_bin': [], 'model_idx': [], 'sort': [], 'init': [], 'file_name': [], 'tuned_idx': [], 'op_name': [], }) MSG_INFO(self, 'There are %i files for this task...' % (len(paths))) MSG_INFO(self, 'Filling the table... ') for ituned_file_name in progressbar(paths, 'Reading %s...' % tag): #for ituned_file_name in paths: try: gfile = load(ituned_file_name) except: #MSG_WARNING(self, "File %s not open. skip.", ituned_file_name) continue tuned_file = gfile['tunedData'] for idx, ituned in enumerate(tuned_file): history = ituned['history'] for op, config_dict in self.__config_dict.items(): # get the basic from model dataframe['train_tag'].append(tag) dataframe['model_idx'].append(ituned['imodel']) dataframe['sort'].append(ituned['sort']) dataframe['init'].append(ituned['init']) dataframe['et_bin'].append( self.get_etbin(ituned_file_name)) dataframe['eta_bin'].append( self.get_etabin(ituned_file_name)) dataframe['file_name'].append(ituned_file_name) dataframe['tuned_idx'].append(idx) dataframe['op_name'].append(op) # Get the value for each wanted key passed by the user in the contructor args. for key, local in config_dict.items(): if not key in dataframe.keys(): dataframe[key] = [self.__get_value(history, local)] else: dataframe[key].append( self.__get_value(history, local)) # append tables if is need # ignoring index to avoid duplicated entries in dataframe self.__table = self.__table.append( pd.DataFrame(dataframe), ignore_index=True ) if not self.__table is None else pd.DataFrame(dataframe) MSG_INFO(self, 'End of fill step, a pandas DataFrame was created...')
def fill( self, data_paths, models, reference_values, verbose=False ): # make template dataframe dataframe = collections.OrderedDict({ 'name':[], 'et_bin':[], 'eta_bin':[], 'reference_signal_passed':[], 'reference_signal_total':[], 'reference_signal_eff':[], 'reference_background_passed':[], 'reference_background_total':[], 'reference_background_eff':[], 'signal_passed':[], 'signal_total':[], 'signal_eff':[], 'background_passed':[], 'background_total':[], 'background_eff':[], 'signal_corrected_passed':[], 'signal_corrected_total':[], 'signal_corrected_eff':[], 'background_corrected_passed':[], 'background_corrected_total':[], 'background_corrected_eff':[], }) # reduce verbose def add(key,value): dataframe[key].append(value) # Loop over all et/eta bins for et_bin, eta_bin in progressbar(product(range(len(self.__etbins)-1),range(len(self.__etabins)-1)), (len(self.__etbins)-1)*(len(self.__etabins)-1), prefix = "Fitting... " ): path = data_paths[et_bin][eta_bin] data, target, avgmu = self.__generator(path) references = reference_values[et_bin][eta_bin] model = models[et_bin][eta_bin] model['thresholds'] = {} # Get the predictions outputs = model['model'].predict(data, batch_size=1024, verbose=verbose).flatten() # Get all limits using the output xmin = int(np.percentile(outputs , self.__xmin_percentage)) xmax = int(np.percentile(outputs, self.__xmax_percentage)) MSG_DEBUG(self, 'Setting xmin to %1.2f and xmax to %1.2f', xmin, xmax) xbins = int((xmax-xmin)/self.__x_bin_size) ybins = int((self.__ymax-self.__ymin)/self.__y_bin_size) # Fill 2D histograms from ROOT import TH2F import array th2_signal = TH2F( 'th2_signal_et%d_eta%d'%(et_bin,eta_bin), '', xbins, xmin, xmax, ybins, self.__ymin, self.__ymax ) w = array.array( 'd', np.ones_like( outputs[target==1] ) ) th2_signal.FillN( len(outputs[target==1]), array.array('d', outputs[target==1].tolist()), array.array('d',avgmu[target==1].tolist()), w) th2_background = TH2F( 'th2_background_et%d_eta%d'%(et_bin,eta_bin), '', xbins,xmin, xmax, ybins, self.__ymin, self.__ymax ) w = array.array( 'd', np.ones_like( outputs[target==0] ) ) th2_background.FillN( len(outputs[target==0]), array.array('d',outputs[target!=1].tolist()), array.array('d',avgmu[target!=1].tolist()), w) MSG_DEBUG( self, 'Applying linear correction to et%d_eta%d bin.', et_bin, eta_bin) for name, ref in references.items(): false_alarm = 1.0 while false_alarm > self.__false_alarm_limit: # Get the threshold when we not apply any linear correction threshold, _ = self.find_threshold( th2_signal.ProjectionX(), ref['pd'] ) # Get the efficiency without linear adjustment #signal_eff, signal_num, signal_den = self.calculate_num_and_den_from_hist(th2_signal, 0.0, threshold) signal_eff, signal_num, signal_den = self.calculate_num_and_den_from_output(outputs[target==1], avgmu[target==1], 0.0, threshold) #background_eff, background_num, background_den = self.calculate_num_and_den_from_hist(th2_background, 0.0, threshold) background_eff, background_num, background_den = self.calculate_num_and_den_from_output(outputs[target!=1], avgmu[target!=1], 0.0, threshold) # Apply the linear adjustment and fix it in case of positive slope slope, offset, x_points, y_points, error_points = self.fit( th2_signal, ref['pd'] ) slope = 0 if slope>0 else slope offset = threshold if slope>0 else offset if slope>0: MSG_WARNING(self, "Retrieved positive angular factor of the linear correction, setting to 0!") # Get the efficiency with linear adjustment #signal_corrected_eff, signal_corrected_num, signal_corrected_den = self.calculate_num_and_den_from_hist(th2_signal, slope, offset) signal_corrected_eff, signal_corrected_num, signal_corrected_den = self.calculate_num_and_den_from_output(outputs[target==1], \ avgmu[target==1], slope, offset) #background_corrected_eff, background_corrected_num, background_corrected_den = self.calculate_num_and_den_from_hits(th2_background, slope, offset) background_corrected_eff, background_corrected_num, background_corrected_den = self.calculate_num_and_den_from_output(outputs[target!=1], \ avgmu[target!=1], slope, offset) false_alarm = background_corrected_num/background_corrected_den # get the passed/total if false_alarm > self.__false_alarm_limit: # Reduce the reference value by hand value-=0.0025 MSG_DEBUG( self, 'Reference name: %s, target: %1.2f%%', name, ref['pd']*100 ) MSG_DEBUG( self, 'Signal with correction is: %1.2f%%', signal_corrected_num/signal_corrected_den * 100 ) MSG_DEBUG( self, 'Background with correction is: %1.2f%%', background_corrected_num/background_corrected_den * 100 ) # decore the model array model['thresholds'][name] = {'offset':offset, 'slope':slope, 'threshold' : threshold, 'th2_signal':th2_signal, 'th2_background':th2_background, 'x_points':x_points, 'y_points':y_points, 'error_points':error_points, 'reference_pd': ref['pd'], 'reference_fa':ref['fa']} # et/eta bin information add( 'name' , name ) add( 'et_bin' , et_bin ) add( 'eta_bin' , eta_bin ) # reference values add( 'reference_signal_passed' , int(ref['pd']*signal_den) ) add( 'reference_signal_total' , signal_den ) add( 'reference_signal_eff' , ref['pd'] ) add( 'reference_background_passed' , int(ref['fa']*background_den) ) add( 'reference_background_total' , background_den ) add( 'reference_background_eff' , ref['fa'] ) # non-corrected values add( 'signal_passed' , signal_num ) add( 'signal_total' , signal_den ) add( 'signal_eff' , signal_num/signal_den ) add( 'background_passed' , background_num ) add( 'background_total' , background_den ) add( 'background_eff' , background_num/background_den ) # corrected values add( 'signal_corrected_passed' , signal_corrected_num ) add( 'signal_corrected_total' , signal_corrected_den ) add( 'signal_corrected_eff' , signal_corrected_num/signal_corrected_den ) add( 'background_corrected_passed' , background_corrected_num ) add( 'background_corrected_total' , background_corrected_den ) add( 'background_corrected_eff' , background_corrected_num/background_corrected_den ) # convert to pandas dataframe self.__table = pandas.DataFrame( dataframe )
mainLogger.info('Get histograms from files....') objects = []; summary = [] for idx, files_ref in enumerate(files): paths_ref, keys = GetHistogramRootPaths( triggerList, removeInnefBefore=args.removeInnefBefore, is_emulation=is_emulated_trigger[idx], logger=mainLogger ) objects.append( GetHistogramFromMany(files_ref, paths_ref, keys, prefix='Getting reference...', logger=mainLogger) ) s={} for trigger in triggerList: s[trigger]={'L1Calo':0.0, 'L2Calo':0.0, 'L2':0.0, 'EFCalo':0.0, 'HLT':0.0} summary.append( s ) ### Plotting entries=len(triggerList) step = int(entries/100) if int(entries/100) > 0 else 1 from EfficiencyTools.drawers import PlotProfiles for trigItem in progressbar(triggerList, entries, step=step,logger=mainLogger, prefix='Plotting...'): isL1 = True if trigItem.startswith('L1_') else False these_level_names = ['L1Calo'] if isL1 else level_names ### Plot all profiles here! for idx, histname in enumerate(plot_names): # resize <mu> range resize = [12,20,80] if 'mu' in histname else None #doFitting = True if 'mu' in histname and args.doNonLinearityTest else False for level in these_level_names: #try: outname = localpath+'/'+dirpath+'/'+level+'_'+trigItem.replace('HLT_','')+'_'+histname+'.pdf' legends = []; curves = [] # loop over each turn-on inside of the plot for jdx, objects_ref in enumerate(objects): summary[jdx][trigItem][level]=(objects_ref[trigItem+'_'+level+'_match_'+histname].GetEntries()/
def GetHistogramFromMany(basepath, paths, keys, prefix='Loading...', logger=None): from Gaugi import progressbar, expandFolders from copy import deepcopy # internal open function def Open(path): from ROOT import TFile f = TFile(path, 'read') if len(f.GetListOfKeys()) > 0: run_numbers = [key.GetName() for key in f.GetListOfKeys()] return f, run_numbers else: return f, None # internal close function def Close(f): f.Close() del f # internal retrive histogram def GetHistogram(f, run_number, path, logger=None): try: hist = f.Get(run_number + '/' + path) hist.GetEntries() return hist except: return None # internal integration def SumHists(histList): totalHist = None for hist in histList: if hist is None: continue if totalHist is None: totalHist = deepcopy(hist.Clone()) else: totalHist.Add(hist) return totalHist files = expandFolders(basepath) hists = {} for f in progressbar(files, len(files), prefix=prefix, logger=logger): try: _f, _run_numbers = Open(f) except: continue if _run_numbers is None: continue for idx, _path in enumerate(paths): for _run_number in _run_numbers: hist = GetHistogram(_f, _run_number, _path) if (hist is not None): if not keys[idx] in hists.keys(): hists[keys[idx]] = [deepcopy(hist.Clone())] else: hists[keys[idx]].append(deepcopy(hist.Clone())) Close(_f) for key in hists.keys(): hists[key] = SumHists(hists[key]) #from pprint import pprint #pprint(hists) return hists