def runFill(opts) : batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_other outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug blinded = not opts.unblind tightight = opts.require_tight_tight if debug : dataset.Dataset.verbose_parsing = True groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) groups = parse_group_option(opts, groups) if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug : print '\n'.join("group {0} : {1} samples: {2}".format(g.name, len(g.datasets), '\n\t'+'\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose : print "filling histos" # eval will take care of aborting on typos onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None mkdirIfNeeded(outputDir) systematics = get_list_of_syst_to_fill(opts) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) if verbose : print "about to loop over these systematics:\n %s"%str(systematics) if verbose : print "about to loop over these regions:\n %s"%str(regions) if batchMode: for group in groups: for systematic in systematics: if systUtils.Group(group.name).isNeededForSys(systematic): opts.syst = systematic for selection in regions: submit_batch_fill_job_per_group_per_selection(group=group, selection=selection, opts=opts) else: for group in groups: systematics = [s for s in systematics if systUtils.Group(group.name).isNeededForSys(s)] if not systematics : print "warning, empty syst list. You should have at least the nominal" for systematic in systematics: # note to self: here you will want to use a modified Sample.setHftInputDir # for now we just have the fake syst that are in the nominal tree tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) input_dir = opts.input_fake if group.name=='fake' else opts.input_other for ds in group.datasets: chain.Add(os.path.join(input_dir, systUtils.Sample(ds.name, group.name).setSyst(systematic).filename)) if opts.verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list() uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list() if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, cached_cut=cut) out_filename = (systUtils.Group(group.name) .setSyst(systematic) .setHistosDir(outputDir) .setCurrentSelection(cut.GetName())).filenameHisto writeObjectsToFile(out_filename, h_pre, verbose) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if uncached_tcuts: if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) counters_npre, histos_npre = count_and_fill(chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, noncached_cuts=uncached_tcuts) for sel, histos in histos_npre.iteritems(): out_filename = (systUtils.Group(group.name) .setSyst(systematic) .setHistosDir(outputDir) .setCurrentSelection(sel)).filenameHisto writeObjectsToFile(out_filename, histos, verbose) chain.save_lists()
def runFill(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) if opts.group: groups = [g for g in groups if g.name == opts.group] if verbose: print '\n'.join( "group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug: print '\n'.join("group {0} : {1} samples: {2}".format( g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose: print "filling histos" outputDir = outputDir + '/' + lepton + '/histos' mkdirIfNeeded(outputDir) if batchMode: for group in groups: submit_batch_fill_job_per_group(group, opts) else: for group in groups: tree_name = 'ss3l_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: chain.Add(os.path.join(inputDir, ds.name + '.root')) if opts.verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [ r.TCut(reg, selection_formulas()[reg]) for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) ] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list( ) uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list( ) print 'todo: skip cuts for which the histo files are there' if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, opts=opts, group=group, cached_cut=cut) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) if uncached_tcuts: counters_npre, histos_npre = count_and_fill( chain=chain, opts=opts, group=group, noncached_cuts=uncached_tcuts) chain.save_lists() all_histos = dictSum(histos_pre, histos_npre) for sel, histos in all_histos.iteritems(): # write histos for each sel to a separate file (finer granularity, better caching) out_filename = os.path.join(outputDir, group.name + '_' + sel + '.root') if verbose: print 'saving to ', out_filename writeObjectsToFile(out_filename, histos, verbose)
def runFill(opts): batchMode = opts.batch inputFakeDir = opts.input_fake inputGenDir = opts.input_other outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug blinded = not opts.unblind tightight = opts.require_tight_tight if debug: dataset.Dataset.verbose_parsing = True groups = dataset.DatasetGroup.build_groups_from_files_in_dir( opts.samples_dir) if not skip_charge_flip: groups.append( dataset.DatasetGroup.build_qflip_from_simulated_samples(groups)) groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake()) groups = parse_group_option(opts, groups) if verbose: print '\n'.join( "group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug: print '\n'.join("group {0} : {1} samples: {2}".format( g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose: print "filling histos" # eval will take care of aborting on typos onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None mkdirIfNeeded(outputDir) systematics = get_list_of_syst_to_fill(opts) regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions) if verbose: print "about to loop over these systematics:\n %s" % str(systematics) if verbose: print "about to loop over these regions:\n %s" % str(regions) if batchMode: for group in groups: for systematic in systematics: if systUtils.Group(group.name).isNeededForSys(systematic): opts.syst = systematic for selection in regions: submit_batch_fill_job_per_group_per_selection( group=group, selection=selection, opts=opts) else: for group in groups: systematics = [ s for s in systematics if systUtils.Group(group.name).isNeededForSys(s) ] if not systematics: print "warning, empty syst list. You should have at least the nominal" for systematic in systematics: # note to self: here you will want to use a modified Sample.setHftInputDir # for now we just have the fake syst that are in the nominal tree tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) input_dir = opts.input_fake if group.name == 'fake' else opts.input_other for ds in group.datasets: chain.Add( os.path.join( input_dir, systUtils.Sample( ds.name, group.name).setSyst(systematic).filename)) if opts.verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [ r.TCut(reg, selection_formulas()[reg]) for reg in regions ] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list( ) uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list( ) if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, cached_cut=cut) out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection( cut.GetName())).filenameHisto writeObjectsToFile(out_filename, h_pre, verbose) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if uncached_tcuts: if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) counters_npre, histos_npre = count_and_fill( chain=chain, sample=group.name, syst=systematic, verbose=verbose, debug=debug, blinded=blinded, onthefly_tight_def=onthefly_tight_def, tightight=tightight, quicktest=opts.quick_test, noncached_cuts=uncached_tcuts) for sel, histos in histos_npre.iteritems(): out_filename = (systUtils.Group( group.name).setSyst(systematic).setHistosDir( outputDir).setCurrentSelection(sel) ).filenameHisto writeObjectsToFile(out_filename, histos, verbose) chain.save_lists()
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)') parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option('--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/') parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.') parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)') parser.add_option('--debug', action='store_true') parser.add_option('--verbose', action='store_true') parser.add_option('--disable-cache', action='store_true', help='disable the entry cache') (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region keepreal = options.keep_real debug = options.debug verbose = options.verbose if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton) regions = kin.selection_formulas().keys() assert region in regions,"invalid region '%s', must be one of %s"%(region, str(sorted(regions))) regions = [region] dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(options.samples_dir) if options.group : groups = [g for g in groups if g.name==options.group] group_names = [g.name for g in groups] outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) templateOutputFilename = "scale_factor_{0}.root".format(lepton) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos if verbose : utils.print_running_conditions(parser, options) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1'] #fill histos if doFillHistograms : start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, group_names, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region) for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: fname = os.path.join(inputDir, ds.name+'.root') if os.path.exists(fname): chain.Add(fname) if verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] print 'tcuts ',[c.GetName() for c in tcuts] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() print 'tcuts_with_existing_list ',str([c.GetName() for c in chain.tcuts_with_existing_list()]) print 'tcuts_without_existing_list ',str([c.GetName() for c in chain.tcuts_without_existing_list()]) cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list() print 'cached_tcuts ',[c.GetName() for c in cached_tcuts] uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list() print 'todo: skip cuts for which the histo files are there' if verbose: print " --- group : {0} ---".format(group.name) print '\n\t'.join(chain.filenames) if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) if verbose: print "%s : %d entries"%(group.name, chain.GetEntries()) histosThisGroup = histosPerGroup[group.name] histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys()) for cut in cached_tcuts: print 'cached_tcut ',cut chain.preselect(cut) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=True, onthefly_tight_def=onthefly_tight_def, verbose=verbose) if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) if uncached_tcuts: assert len(uncached_tcuts)==1, "expecting only one cut, got {}".format(len(uncached_tcuts)) cut = uncached_tcuts[0] chain.preselect(None) num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=False, onthefly_tight_def=onthefly_tight_def, verbose=verbose) chain.save_lists() writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print ("processed {0:d} entries ".format(num_processed_entries) +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else "{0:.1f} s ".format(delta_time)) +"({0:.1f} kHz)".format(num_processed_entries/delta_time)) # return # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose) histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose) for g in group_names: hps = dict((v, histosPerSamplePerSource[v][g])for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta (lepton) hn_sf_pt = histoname_sf_vs_pt (lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt (lepton) subtractReal = not keepreal objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, subtractReal, verbose) objs_pt_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton), histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region, subtractReal, verbose) rootUtils.writeObjectsToFile(outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose) if verbose : print "saved scale factors to %s" % outputFileName
def runFill(opts): lepton = opts.lepton batchMode = opts.batch inputDir = opts.input_dir outputDir = opts.output_dir verbose = opts.verbose debug = opts.debug dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir) if opts.group : groups = [g for g in groups if g.name==opts.group] if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups) if debug : print '\n'.join("group {0} : {1} samples: {2}".format(g.name, len(g.datasets), '\n\t'+'\n\t'.join(d.name for d in g.datasets)) for g in groups) if verbose : print "filling histos" outputDir = outputDir+'/'+lepton+'/histos' mkdirIfNeeded(outputDir) if batchMode: for group in groups: submit_batch_fill_job_per_group(group, opts) else: for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: chain.Add(os.path.join(inputDir, ds.name+'.root')) if opts.verbose: print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list() uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list() print 'todo: skip cuts for which the histo files are there' if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts]) for cut in cached_tcuts: chain.preselect(cut) c_pre, h_pre = count_and_fill(chain=chain, opts=opts, group=group, cached_cut=cut) counters_pre = dictSum(counters_pre, c_pre) histos_pre = dictSum(histos_pre, h_pre) if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts]) if uncached_tcuts: counters_npre, histos_npre = count_and_fill(chain=chain, opts=opts, group=group, noncached_cuts=uncached_tcuts) chain.save_lists() all_histos = dictSum(histos_pre, histos_npre) for sel, histos in all_histos.iteritems(): # write histos for each sel to a separate file (finer granularity, better caching) out_filename = os.path.join(outputDir, group.name+'_'+sel+'.root') if verbose : print 'saving to ',out_filename writeObjectsToFile(out_filename, histos, verbose)
def main(): parser = optparse.OptionParser(usage=usage) parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)') parser.add_option('-i', '--input-dir', default='./out/fakerate') parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor') parser.add_option('-l', '--lepton', default='el', help='either el or mu') parser.add_option( '-r', '--region', help='one of the regions for which we saved the fake ntuples') parser.add_option( '--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/') parser.add_option( '-T', '--tight-def', help= 'on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.' ) parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)') parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)') parser.add_option('--debug', action='store_true') parser.add_option('--verbose', action='store_true') parser.add_option('--disable-cache', action='store_true', help='disable the entry cache') (options, args) = parser.parse_args() inputDir = options.input_dir outputDir = options.output_dir lepton = options.lepton region = options.region keepreal = options.keep_real debug = options.debug verbose = options.verbose if lepton not in ['el', 'mu']: parser.error("invalid lepton '%s'" % lepton) regions = kin.selection_formulas().keys() assert region in regions, "invalid region '%s', must be one of %s" % ( region, str(sorted(regions))) regions = [region] dataset.Dataset.verbose_parsing = True if debug else False groups = dataset.DatasetGroup.build_groups_from_files_in_dir( options.samples_dir) if options.group: groups = [g for g in groups if g.name == options.group] group_names = [g.name for g in groups] outputDir = outputDir + '/' + region + '/' + lepton # split the output in subdirectories, so we don't overwrite things mkdirIfNeeded(outputDir) templateOutputFilename = "scale_factor_{0}.root".format(lepton) outputFileName = os.path.join(outputDir, templateOutputFilename) cacheFileName = outputFileName.replace('.root', '_cache.root') doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName) onthefly_tight_def = eval( options.tight_def ) if options.tight_def else None # eval will take care of aborting on typos if verbose: utils.print_running_conditions(parser, options) vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1'] #fill histos if doFillHistograms: start_time = time.clock() num_processed_entries = 0 histosPerGroup = bookHistos(vars, group_names, region=region) histosPerSource = bookHistosPerSource(vars, leptonSources, region=region) histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region) for group in groups: tree_name = 'hlfv_tuple' chain = IndexedChain(tree_name) for ds in group.datasets: fname = os.path.join(inputDir, ds.name + '.root') if os.path.exists(fname): chain.Add(fname) if verbose: print "{0} : {1} entries from {2} samples".format( group.name, chain.GetEntries(), len(group.datasets)) chain.cache_directory = os.path.abspath('./selection_cache/' + group.name + '/') tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions] print 'tcuts ', [c.GetName() for c in tcuts] chain.retrieve_entrylists(tcuts) counters_pre, histos_pre = dict(), dict() counters_npre, histos_npre = dict(), dict() print 'tcuts_with_existing_list ', str( [c.GetName() for c in chain.tcuts_with_existing_list()]) print 'tcuts_without_existing_list ', str( [c.GetName() for c in chain.tcuts_without_existing_list()]) cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list( ) print 'cached_tcuts ', [c.GetName() for c in cached_tcuts] uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list( ) print 'todo: skip cuts for which the histo files are there' if verbose: print " --- group : {0} ---".format(group.name) print '\n\t'.join(chain.filenames) if verbose: print 'filling cached cuts: ', ' '.join( [c.GetName() for c in cached_tcuts]) if verbose: print "%s : %d entries" % (group.name, chain.GetEntries()) histosThisGroup = histosPerGroup[group.name] histosThisGroupPerSource = dict( (v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys()) for cut in cached_tcuts: print 'cached_tcut ', cut chain.preselect(cut) num_processed_entries += fillHistos( chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=True, onthefly_tight_def=onthefly_tight_def, verbose=verbose) if verbose: print 'filling uncached cuts: ', ' '.join( [c.GetName() for c in uncached_tcuts]) if uncached_tcuts: assert len(uncached_tcuts ) == 1, "expecting only one cut, got {}".format( len(uncached_tcuts)) cut = uncached_tcuts[0] chain.preselect(None) num_processed_entries += fillHistos( chain, histosThisGroup, histosPerSource, histosThisGroupPerSource, lepton, group, cut, cut_is_cached=False, onthefly_tight_def=onthefly_tight_def, verbose=verbose) chain.save_lists() writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose) end_time = time.clock() delta_time = end_time - start_time if verbose: print("processed {0:d} entries ".format(num_processed_entries) + "in " + ("{0:d} min ".format(int(delta_time / 60)) if delta_time > 60 else "{0:.1f} s ".format(delta_time)) + "({0:.1f} kHz)".format(num_processed_entries / delta_time)) # return # compute scale factors histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose) histosPerSource = fetchHistos( cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose) histosPerSamplePerSource = fetchHistos( cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose) plotStackedHistos(histosPerGroup, outputDir + '/by_group', region, verbose) plotStackedHistosSources(histosPerSource, outputDir + '/by_source', region, verbose) plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir + '/by_source', lepton=lepton, region=region, verbose=verbose) for g in group_names: hps = dict((v, histosPerSamplePerSource[v][g]) for v in vars) plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose) hn_sf_eta = histoname_sf_vs_eta(lepton) hn_sf_pt = histoname_sf_vs_pt(lepton) hn_da_eta = histoname_data_fake_eff_vs_eta(lepton) hn_da_pt = histoname_data_fake_eff_vs_pt(lepton) subtractReal = not keepreal objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose) objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1', hn_sf_pt, hn_da_pt, outputDir, region, subtractReal, verbose) objs_pt_eta = subtractRealAndComputeScaleFactor( histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton), histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region, subtractReal, verbose) rootUtils.writeObjectsToFile( outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose) if verbose: print "saved scale factors to %s" % outputFileName