Esempi in Python per ROOT, esempi in Python per tthAnalysis.HiggsToTauTau.safe_root.ROOT

Esempio n. 1

0

Mostra file

File: skim_root.py Progetto: saswatinandan/tth-htt

def skim(in_filename, out_filename, entry_list, tree_name = "Events"):
  '''
  Args:
    in_filename:  string,    Path to the input ROOT file
    out_filename: string,    Path to the output ROOT file
    entry_list:   int array, List of entries (not RLE numbers!) which are used to select the event
    tree_name:    string,    TTree name (default: tree)

  Returns:
    True,  if none of the entry numbers didn't exceed the actual number of entries in the ROOT file
    False, otherwise
  '''
  logging.debug("Processing file {in_filename}".format(in_filename = in_filename))
  f_in = ROOT.TFile(in_filename, "read")
  t_in = f_in.Get(tree_name)

  nof_entries = t_in.GetEntries()
  if max(entry_list) > nof_entries:
    logging.error("Max entry ID exceeds the number of entries in {root_filename}: {max_entry} > {nof_entries}".format(
      root_filename = in_filename,
      max_entry     = max(entry_list),
      nof_entries   = nof_entries,
    ))
    return False

  f_out = ROOT.TFile(out_filename, "recreate")
  t_out = t_in.CloneTree(0)

  for i in entry_list:
    t_in.GetEntry(i)
    t_out.Fill()

  t_out.AutoSave()
  logging.debug("Saved events to {out_filename}".format(out_filename = out_filename))
  return True

Esempio n. 2

0

Mostra file

def dump_rle(input_file,
             output_file,
             tree_name='Events',
             run_br='run',
             lumi_br='luminosityBlock',
             event_br='event'):
    with open(output_file, 'w') as f:
        ch_root = ROOT.TChain(tree_name)
        ch_root.AddFile(input_file)

        run_a = array.array('I', [0])
        lumi_a = array.array('I', [0])
        evt_a = array.array('L', [0])

        ch_root.SetBranchAddress(run_br, run_a)
        ch_root.SetBranchAddress(lumi_br, lumi_a)
        ch_root.SetBranchAddress(event_br, evt_a)

        nof_entries = ch_root.GetEntries()
        rle_i_arr = []
        for i in range(nof_entries):
            ch_root.GetEntry(i)
            rle_i_arr.append(':'.join(map(str,
                                          [run_a[0], lumi_a[0], evt_a[0]])))

        f.write("{rle_lines}\n".format(rle_lines='\n'.join(rle_i_arr)))

    logging.debug("Wrote {nof_bytes} kB to {filename}".format(
        nof_bytes=os.path.getsize(output_file) / 1000,
        filename=output_file,
    ))
    return

Esempio n. 3

0

Mostra file

File: project_stitch.py Progetto: kartikmaurya/tth-htt

def project(input_file, output_file, binnings):
    if not os.path.isfile(input_file):
        raise RuntimeError('No such file: %s' % input_file)
    root_file = ROOT.TFile.Open(input_file, 'read')
    if not root_file:
        print('Unable to read file %s' % input_file)
        return False
    events = root_file.Get('Events')
    assert (events)

    histograms = []
    for branch_name, binning_array in binnings.items():
        binning = array.array('f', binning_array)
        histogram = ROOT.TH1F(branch_name, branch_name,
                              len(binning) - 1, binning)
        assert (histogram)
        events.Project(branch_name, branch_name)
        histograms.append(histogram)

    out_file = ROOT.TFile.Open(output_file, 'recreate')
    out_file.cd()
    for histogram in histograms:
        histogram.Write()

    out_file.Close()
    root_file.Close()
    return True

Esempio n. 4

0

Mostra file

File: skim_root.py Progetto: saswatinandan/tth-htt

def get_rle(in_filename, tree_name = "tree"):
  '''Fetches all RLE numbers in a given file
  Args:
    in_filename: string, Path to the ROOT file from which the RLE numbers are fetched
    tree_name:   string, TTree name (default: tree)

  Returns:
    string array, List of RLE numbers (in the format returned by join_rle())

  The function expects the input file to exist in the file system.
  '''
  rle_list = []
  f_in = ROOT.TFile(in_filename, "read")
  t_in = f_in.Get(tree_name)

  run  = array.array('I', [0])
  lumi = array.array('I', [0])
  evt  = array.array('L', [0])

  t_in.SetBranchAddress("run", run)
  t_in.SetBranchAddress("luminosityBlock", lumi)
  t_in.SetBranchAddress("event", evt)

  nof_entries = t_in.GetEntries()
  for i in range(nof_entries):
    t_in.GetEntry(i)
    rle_list.append(join_rle(run, lumi, evt))

  return rle_list

Esempio n. 5

0

Mostra file

 def plot2d(histogram, plot_fn_base, width=1200, height=900):
     canvas = ROOT.TCanvas('c1', 'c1', width, height)
     ROOT.gStyle.SetOptStat(0)
     histogram.Draw('col text')
     canvas.SetLogy()
     canvas.SaveAs('%s.png' % plot_fn_base)
     canvas.SaveAs('%s.pdf' % plot_fn_base)
     del canvas

Esempio n. 6

0

Mostra file

def check_that_histogram_is_not_zombie(input_file):
    print("<check_that_histogram_is_not_zombie>: input file = '%s'" %
          input_file)

    root_tfile = ROOT.TFile(input_file, "read")

    if root_tfile.IsZombie():  # MP: THIS IS NOT WORKING :(
        print("ERROR: Input file '%s' is zombie !!" % input_file)
        sys.exit(1)

Esempio n. 7

0

Mostra file

def plot(input_files, output_files, title, legend):
    handle_lhe = Handle('LHEEventProduct')
    label_lhe = ('externalLHEProducer')

    bins = array.array('f', [1. * i for i in range(0, 201)])
    h = ROOT.TH1F(legend, legend, len(bins) - 1, bins)

    for input_file in input_files:
        events = Events(input_file)

        for event in events:
            event.getByLabel(label_lhe, handle_lhe)
            lhe = handle_lhe.product()
            invmass = []

            for status, pdg, mom in zip(lhe.hepeup().ISTUP,
                                        lhe.hepeup().IDUP,
                                        lhe.hepeup().PUP):
                if status == 1 and abs(pdg) in [11, 13, 15]:
                    l = ROOT.TLorentzVector(mom.x[0], mom.x[1], mom.x[2],
                                            mom.x[3])
                    invmass.append(l)

            if len(invmass) == 2:
                h.Fill((invmass[0] + invmass[1]).M())
            else:
                raise RuntimeError(
                    'Did not find exactly 2 but %d LHE leptons in %s' %
                    (len(invmass), input_file))

    c = ROOT.TCanvas('c', 'c')
    c.SetLogy()
    c.SetGrid()
    h.SetTitle(title)
    h.GetXaxis().SetTitle('m_ll [GeV]')
    h.GetYaxis().SetTitle('# events')
    h.Draw()
    for output_file in output_files:
        c.SaveAs(output_file)
    del h
    del c

Esempio n. 8

0

Mostra file

 def create_histogram(key, title):
     if is2D:
         histograms[key] = ROOT.TH2D(key, title,
                                     len(bins_x) - 1, bins_x,
                                     len(bins_y) - 1, bins_y)
         for bin_idx in range(len(bins_x) - 1):
             histograms[key].GetXaxis().SetBinLabel(
                 bin_idx + 1, '%d <= %s < %d' %
                 (bins_x[bin_idx], x_var, bins_x[bin_idx + 1]))
         for bin_idx in range(len(bins_y) - 1):
             histograms[key].GetYaxis().SetBinLabel(
                 bin_idx + 1, '%d <= %s < %d' %
                 (bins_y[bin_idx], y_var, bins_y[bin_idx + 1]))
         histograms[key].SetXTitle(x_var)
         histograms[key].SetYTitle(y_var)
     else:
         histograms[key] = ROOT.TH1D(key, title, len(bins_x) - 1, bins_x)
         for bin_idx in range(len(bins_x) - 1):
             histograms[key].GetXaxis().SetBinLabel(
                 bin_idx + 1, '%d <= %s < %d' %
                 (bins_x[bin_idx], x_var, bins_x[bin_idx + 1]))
         histograms[key].SetXTitle(x_var)

Esempio n. 9

0

Mostra file

def is_file_ok(output_file_name, validate_outputs=True, min_file_size=20000):
    if not (output_file_name and os.path.exists(output_file_name)):
        return False

    logging.info("Output file %s already exists" % output_file_name)

    if not output_file_name.lower().endswith('.root'):
        return True

    command = "rm %s" % output_file_name
    ret_value = False
    if min_file_size > 0:
        output_file_size = os.stat(output_file_name).st_size
        if output_file_size > min_file_size:
            if not validate_outputs:
                ret_value = True
        else:
            logging.info(
                "Deleting output file and resubmitting job because it has size smaller than %d bytes"
                % min_file_size)

    if validate_outputs:
        root_tfile = ROOT.TFile(output_file_name, "read")
        if not root_tfile:
            logging.info("Not a valid ROOT file, deleting it")
        else:
            if root_tfile.IsZombie():
                logging.info(
                    "Output file is corrupted, deleting file and resubmitting job"
                )
            else:
                # Let's open the file via bash as well to see if ROOT tries to recover the file
                open_cmd = "root -b -l -q %s 2>&1 > /dev/null | grep 'trying to recover' | wc -l" % output_file_name
                open_out = run_cmd(open_cmd)
                if open_out.rstrip('\n') != '0':
                    logging.info(
                        "Output file is probably corrupted, deleting file and resubmitting job"
                    )
                else:
                    ret_value = True
            root_tfile.Close()

    if not ret_value:
        run_cmd(command)

    return ret_value

Esempio n. 10

0

Mostra file

def save_weights(fp, sample_names, stitching_dict):
    for sample_name in sample_names:
        sample_dir = fp.mkdir(sample_name)
        hist_dir = sample_dir.mkdir(SPLITVAR)
        hist_dir.cd()
        for bin_count_name in stitching_dict:
            bin_array = array.array('f', list(range(max(bin_keys) + 2)))
            hist = ROOT.TH1D(bin_count_name, bin_count_name,
                             len(bin_array) - 1, bin_array)
            hist.SetDirectory(hist_dir)
            hist.SetTitle(bin_count_name)
            hist.SetXTitle(SPLITVAR)
            for bin_idx in bin_keys:
                hist.SetBinContent(bin_idx + 1,
                                   stitching_dict[bin_count_name][bin_idx])
                hist.GetXaxis().SetBinLabel(
                    bin_idx + 1,
                    "%d <= %s < %d" % (bin_idx, SPLITVAR, bin_idx + 1))
            hist.Write()

Esempio n. 11

0

Mostra file

File: dump_rle_parallel.py Progetto: saswatinandan/tth-htt

def validate(output_dir, verbose=False):
    '''Validates the job execution carried out by dump_rle_parallel()
  Args:
    output_dir: string, The directory where all RLE files are stored
    verbose:    bool,   Enable verbose output

  Returns:
    None

  The validation is quite basic: the program will loop over the subdirectories of output_dir,
  matches them against the dictionary entries specified by sample variable and counts the number
  of lines in each RLE file. If the number of files doesn't match to the number of entries in
  the corresponding ROOT file, the user will be notified about such discrepancies.

  In principle, the script could also print relevant commands to fix the issues (and dump them
  to an easily executable file) but let's leave it for another time.
  '''

    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    root_file_regex = re.compile('^tree_(\d+).root$')
    file_dict = {k: [] for k in ['excess', 'missing', 'corrupted']}

    try:

        for s_key, s_value in samples.iteritems():
            sample_name = s_value['process_name_specific']
            sample_dir = os.path.join(output_dir, sample_name)
            if os.path.isdir(sample_dir):
                logging.debug("Found sample directory {sample_dir}".format(
                    sample_dir=sample_dir))

                #NB! assume that there are no secondary paths in the dictionary (hence index 0!)
                sample_path_dict = s_value['local_paths'][0]
                sample_path = sample_path_dict['path']
                blacklist = sample_path_dict['blacklist']
                for sample_subdir in os.listdir(sample_path):
                    sample_subpath_idx = -1
                    try:
                        sample_subpath_idx = int(sample_subdir)
                    except ValueError:
                        continue
                    if sample_subpath_idx < 0:
                        raise ValueError("Internal error")
                    sample_subpath = os.path.join(sample_path, sample_subdir)
                    logging.debug(
                        "Processing sample subdirectory {sample_subpath}".
                        format(sample_subpath=sample_subpath))

                    for sample_file in os.listdir(sample_subpath):
                        sample_file_fullpath = os.path.join(
                            sample_subpath, sample_file)
                        if not sample_file.endswith(
                                '.root') or not os.path.isfile(
                                    sample_file_fullpath):
                            continue

                        root_file_regex_match = root_file_regex.search(
                            sample_file)
                        if not root_file_regex_match:
                            continue

                        root_file_idx = int(root_file_regex_match.group(1))
                        expected_rle_file_basename = '{root_file_idx}.txt'.format(
                            root_file_idx=root_file_idx)
                        expected_rle_file = os.path.join(
                            sample_dir, expected_rle_file_basename)
                        file_dict_entry = (expected_rle_file,
                                           sample_file_fullpath)
                        if root_file_idx in blacklist:
                            if os.path.isfile(expected_rle_file):
                                logging.warning(
                                    'Found RLE file {rle_file} (corresponding to blacklisted {root_file}) '
                                    'which you ought to delete'.format(
                                        rle_file=expected_rle_file,
                                        root_file=sample_file_fullpath,
                                    ))
                            file_dict['excess'].append(file_dict_entry)
                            continue

                        if not os.path.isfile(expected_rle_file):
                            logging.warning(
                                'Missing RLE file {rle_file} (corresponding to {root_file})'
                                .format(
                                    rle_file=expected_rle_file,
                                    root_file=sample_file_fullpath,
                                ))
                            file_dict['missing'].append(file_dict_entry)
                            continue
                        nof_rle_events = raw_linecount(expected_rle_file)
                        if nof_rle_events == 1 and os.path.getsize(
                                expected_rle_file) == 1:
                            # the RLE file contains only a newline, hence no events
                            nof_rle_events = 0

                        root_file = ROOT.TFile(sample_file_fullpath, 'read')
                        root_tree = root_file.Get('tree')
                        nof_entries = root_tree.GetEntries()

                        nof_events_diff = nof_rle_events - nof_entries
                        if nof_events_diff < 0:
                            logging.error(
                                'Missing {nof_events} events in {rle_filename} (corresponding to {sample_file}): '
                                'expected {expected}, got {actual}'.format(
                                    nof_events=abs(nof_events_diff),
                                    rle_filename=expected_rle_file,
                                    sample_file=sample_file_fullpath,
                                    expected=nof_entries,
                                    actual=nof_rle_events,
                                ))
                            file_dict['corrupted'].append(file_dict_entry)
                        elif nof_events_diff > 0:
                            logging.error(
                                'Got {nof_events} more event than expected in {rle_filename} (corresponding '
                                'to {sample_file}): expected {expected}, got {actual}'
                                .format(
                                    nof_events=nof_events_diff,
                                    rle_filename=expected_rle_file,
                                    sample_file=sample_file_fullpath,
                                    expected=nof_entries,
                                    actual=nof_rle_events,
                                ))
                            file_dict['corrupted'].append(file_dict_entry)
                        else:
                            logging.debug(
                                'File {rle_filename} (corresponding to {sample_file}) looks OK'
                                .format(
                                    rle_filename=expected_rle_file,
                                    sample_file=sample_file_fullpath,
                                ))

    except KeyboardInterrupt:
        pass

    if any(map(bool, file_dict.values())):
        logging.info('Validation finished with errors')
        for key in file_dict.keys():
            if file_dict[key]:
                logging.info('Number of {key} RLE files: {nof_key}'.format(
                    key=key, nof_key=len(file_dict[key])))
                for entry in file_dict[key]:
                    logging.info('{rle_file} <=> {sample_file}'.format(
                        rle_file=entry[0], sample_file=entry[1]))
    else:
        logging.info('Validation finished successfully')
    return

Esempio n. 12

0

Mostra file

File: project_stitch.py Progetto: kartikmaurya/tth-htt

            for idx in range(1, nof_files + 1):
                if idx in blacklist:
                    continue
                input_file = os.path.join(base_path, '%04d' % (idx // 1000),
                                          'tree_%d.root' % idx)
                output_file = os.path.join(current_subfolder,
                                           'histogram_%d.root' % idx)
                file_exists = project(input_file, output_file, binning)
                if file_exists:
                    output_files.append(output_file)

            hadd(output_files, final_output_file)

            root_file = ROOT.TFile.Open(final_output_file, 'read')
            for binning_key in binning:
                canvas = ROOT.TCanvas('c', 'c', 1200, 900)

                histogram = root_file.Get(binning_key)
                canvas.SetLogy()
                if binning_key == 'LHE_HT':
                    canvas.SetLogx()
                histogram.SetTitle(sample_name)
                histogram.SetXTitle(binning_key)
                histogram.Draw()
                for extension in ['pdf', 'png']:
                    plot_file = os.path.join(
                        output_plot_dir,
                        '%s_%s.%s' % (sample_name, binning_key, extension))
                    canvas.SaveAs(plot_file)
                del histogram
                del canvas

Esempio n. 13

0

Mostra file

    input_tree.SetBranchStatus('*', 0)
    input_tree.SetBranchStatus(collection_sz_name, 1)
    input_tree.SetBranchStatus(collection_pt_name, 1)
    input_tree.SetBranchStatus(collection_eta_name, 1)
    input_tree.SetBranchStatus(collection_phi_name, 1)
    input_tree.SetBranchStatus(collection_mass_name, 1)
    input_tree.SetBranchStatus(collection_pdgId_name, 1)
    if use_genweight:
        input_tree.SetBranchStatus(GENWEIGHT_BR_NAME, 1)

    for idx in range(nof_events):
        input_tree.GetEntry(idx)
        higgses = []
        for partIdx in range(sz_br[0]):
            if pdgId_br[partIdx] == 25:
                p4 = ROOT.TLorentzVector()
                p4.SetPtEtaPhiM(pt_br[partIdx], eta_br[partIdx],
                                phi_br[partIdx], mass_br[partIdx])
                higgses.append(p4)
        if len(higgses) != 2:
            logging.warning(
                "Event #{} did not contain two LHE parton-level Higgses!".
                format(idx))
            continue

        leading_idx = 0 if higgses[0].E() > higgses[1] else 1
        leading_higgs = higgses[leading_idx]
        subleading_higgs = higgses[1 - leading_idx]
        higgs_sum = leading_higgs + subleading_higgs

        mhh = higgs_sum.M()

Esempio n. 14

0

Mostra file

File: rle_matcher.py Progetto: saswatinandan/tth-htt

                        sample_path, '000%d' % (file_idx / 1000),
                        'tree_{i}.root'.format(i=file_idx))
                    rles[rle].append(grep_result)
        else:
            # instead of forming a list of files let's loop over the subfolders and the files therein instead
            logging.debug('Looping over the files in {sample_path}'.format(
                sample_path=sample_path))
            for subdir in hdfs.listdir(sample_path):
                logging.debug(
                    'Found subdirectory {subdir}'.format(subdir=subdir))
                for rootfile in hdfs.listdir(subdir):
                    logging.debug("Processing file '{rootfile}'".format(
                        rootfile=rootfile, ))

                    # open the file
                    ch_root = ROOT.TChain("Events")
                    ch_root.AddFile(rootfile)

                    run_a = array.array('I', [0])
                    lumi_a = array.array('I', [0])
                    evt_a = array.array('L', [0])

                    ch_root.SetBranchAddress("run", run_a)
                    ch_root.SetBranchAddress("luminosityBlock", lumi_a)
                    ch_root.SetBranchAddress("event", evt_a)

                    nof_entries = ch_root.GetEntries()
                    for i in range(nof_entries):
                        ch_root.GetEntry(i)
                        rle_i = ':'.join(
                            map(str, [run_a[0], lumi_a[0], evt_a[0]]))

Esempio n. 15

0

Mostra file

def comp_weights_1(f, samples, samples_to_stitch, split_var, apply_sf=True):
    inclusive_samples = samples_to_stitch['inclusive']['samples']
    inclusive_binning = samples_to_stitch['inclusive'][split_var]

    split_dict = samples_to_stitch['exclusive'][split_var]
    split_binning = [sample['value'] for sample in split_dict]
    complete_binning = list(
        sorted(
            list(
                map(
                    float,
                    set(inclusive_binning) | set(
                        list(itertools.chain.from_iterable(split_binning)))))))

    inclusive_xs = -1
    for sample_key, sample_entry in samples.items():
        if sample_key == 'sum_events': continue
        if sample_entry['process_name_specific'] == inclusive_samples[0]:
            inclusive_xs = sample_entry['xsection']
    assert (inclusive_xs > 0)

    # sum the inclusive nof events
    inclusive_nof_events = {}
    for sample_key, sample_entry in samples.items():
        if sample_key == 'sum_events': continue
        if sample_entry['process_name_specific'] in inclusive_samples:
            if not inclusive_nof_events:
                inclusive_nof_events = copy_nof_events(sample_entry)
            else:
                nof_events_keys = set(nof_key
                                      for nof_key in sample_entry['nof_events']
                                      if is_valid_event_type(nof_key))
                assert (nof_events_keys == set(inclusive_nof_events))
                for nof_key, nof_arr in sample_entry['nof_events'].items():
                    if not is_valid_event_type(nof_key):
                        continue
                    assert (len(nof_arr) == len(inclusive_nof_events[nof_key]))
                    for idx, nof in enumerate(nof_arr):
                        assert (nof > 0)
                        inclusive_nof_events[nof_key][idx] += nof

    # sum the binned nof events
    for binned_sample in split_dict:
        nof_events = {}
        xs = -1
        for sample_key, sample_entry in samples.items():
            if sample_key == 'sum_events': continue
            if sample_entry['process_name_specific'] in binned_sample[
                    'samples']:
                if not nof_events:
                    nof_events = copy_nof_events(sample_entry)
                    inclusive_nof_events_type = set(
                        event_type
                        for event_type in inclusive_nof_events.keys()
                        if is_valid_event_type(event_type))
                    nof_events_type = set(event_type
                                          for event_type in nof_events.keys()
                                          if is_valid_event_type(event_type))
                    assert (inclusive_nof_events_type == nof_events_type)
                else:
                    nof_events_keys = set(
                        nof_key for nof_key in sample_entry['nof_events']
                        if is_valid_event_type(nof_key))
                    assert (nof_events_keys == set(nof_events.keys()))
                    for nof_key, nof_arr in sample_entry['nof_events'].items():
                        if not is_valid_event_type(nof_key):
                            continue
                        assert (len(nof_arr) == len(nof_events[nof_key]))
                        for idx, nof in enumerate(nof_arr):
                            assert (nof > 0)
                            nof_events[nof_key][idx] += nof
                if xs < 0:
                    xs = sample_entry['xsection']
        assert (xs > 0)
        binned_sample['xsection'] = xs
        binned_sample['nof_events'] = nof_events

        lumis = {}
        for nof_key, nof_arr in binned_sample['nof_events'].items():
            lumis[nof_key] = list(
                map(lambda nof: nof / binned_sample['xsection'], nof_arr))
        binned_sample['lumis'] = lumis

    # compute integrated luminosities for the inclusive sample
    inclusive_lumis = {}
    for nof_key, nof_arr in inclusive_nof_events.items():
        inclusive_lumis[nof_key] = list(
            map(lambda nof: nof / inclusive_xs, nof_arr))

    # decide on the bin indices
    idxs_split_sample = []
    for binned_sample in split_dict:
        binned_idx = complete_binning.index(binned_sample['value'][0]) + 1
        binned_sample['idx'] = binned_idx
        idxs_split_sample.append(binned_idx)

    for inclusive_sample in inclusive_samples:
        if inclusive_sample not in [
                key.GetName() for key in f.GetListOfKeys()
        ]:
            histogram_dir_root = f.mkdir(inclusive_sample)
        else:
            histogram_dir_root = f.Get(inclusive_sample)
        if split_var not in [
                key.GetName() for key in histogram_dir_root.GetListOfKeys()
        ]:
            histogram_dir = histogram_dir_root.mkdir(split_var)
        else:
            histogram_dir = histogram_dir_root.Get(split_var)
        histogram_dir.cd()

        for nof_key, lumi_arr in inclusive_lumis.items():
            if not is_valid_event_type(nof_key):
                continue
            for idx, lumi_incl in enumerate(lumi_arr):
                histogram_name = '%s_%d' % (nof_key, idx)
                binning = array.array('f', complete_binning)

                histogram = ROOT.TH1D(histogram_name, histogram_name,
                                      len(binning) - 1, binning)
                histogram.SetDirectory(histogram_dir)
                histogram.SetXTitle(split_var)

                for split_idx in range(1, len(binning)):
                    if split_idx in idxs_split_sample:
                        for binned_sample in split_dict:
                            if split_idx == binned_sample['idx']:
                                lumi_split = binned_sample['lumis'][nof_key][
                                    idx]
                                if binning[split_idx] > inclusive_binning[-1] or \
                                   binning[split_idx] < inclusive_binning[0]:
                                    lumi_incl_calc = 0.
                                else:
                                    lumi_incl_calc = lumi_incl
                                weight = lumi_incl_calc / (lumi_incl_calc +
                                                           lumi_split)
                                histogram.SetBinContent(split_idx, weight)
                    else:
                        histogram.SetBinContent(split_idx, 1.)
                    histogram.GetXaxis().SetBinLabel(
                        split_idx, '%.0f <= %s < %.0f' %
                        (complete_binning[split_idx - 1], split_var,
                         complete_binning[split_idx]))

                histogram.Write()

    for binned_sample in split_dict:
        for sample_name in binned_sample['samples']:
            if sample_name not in [key.GetName() for key in f.GetListOfKeys()]:
                histogram_dir_root = f.mkdir(sample_name)
            else:
                histogram_dir_root = f.Get(sample_name)
            if split_var not in [
                    key.GetName()
                    for key in histogram_dir_root.GetListOfKeys()
            ]:
                histogram_dir = histogram_dir_root.mkdir(split_var)
            else:
                histogram_dir = histogram_dir_root.Get(split_var)
            histogram_dir.cd()

            for nof_key, lumi_arr in inclusive_lumis.items():
                if not is_valid_event_type(nof_key):
                    continue
                for idx, lumi_incl in enumerate(lumi_arr):

                    histogram_name = '%s_%d' % (nof_key, idx)
                    binning = array.array('f', complete_binning)

                    histogram = ROOT.TH1D(histogram_name, histogram_name,
                                          len(binning) - 1, binning)
                    histogram.SetDirectory(histogram_dir)
                    histogram.SetXTitle(split_var)

                    for split_idx in range(1, len(binning)):
                        if split_idx == binned_sample['idx']:
                            lumi_split = binned_sample['lumis'][nof_key][idx]
                            if binning[split_idx] > inclusive_binning[-1] or \
                               binning[split_idx] < inclusive_binning[0]:
                                weight = 1.
                            else:
                                if apply_sf:
                                    weight = lumi_split / (lumi_incl +
                                                           lumi_split)
                                else:
                                    weight = lumi_incl / (lumi_incl +
                                                          lumi_split)
                            assert (weight >= 0.)
                            histogram.SetBinContent(split_idx, weight)
                        else:
                            histogram.SetBinContent(split_idx, 0.)
                        histogram.GetXaxis().SetBinLabel(
                            split_idx, '%.0f <= %s < %.0f' %
                            (complete_binning[split_idx - 1], split_var,
                             complete_binning[split_idx]))

                    histogram.Write()

Esempio n. 16

0

Mostra file

def dump_yields(fn, fn_out):

    print('Dumping event yields from %s to %s' % (fn, fn_out))

    f = ROOT.TFile.Open(fn)
    t = f.Get('Events')

    x_branch = array.array(*get_type(x_var))
    y_branch = array.array(*get_type(y_var)) if is2D else None

    req_nLHESW = 9

    run = array.array('I', [0])
    luminosityBlock = array.array('I', [0])
    event = array.array('L', [0])

    genWeight = array.array('f', [0.])
    puWeight = array.array('f', [0.])
    puWeightUp = array.array('f', [0.])
    puWeightDown = array.array('f', [0.])
    nLHEScaleWeight = array.array('I', [0])
    LHEScaleWeight = array.array('f', [0.] * req_nLHESW)

    max_objs = 32
    nLHEPart = array.array('B', [0])
    LHEPart_pt = array.array('f', [0.] * max_objs)
    LHEPart_eta = array.array('f', [0.] * max_objs)
    LHEPart_phi = array.array('f', [0.] * max_objs)
    LHEPart_mass = array.array('f', [0.] * max_objs)
    LHEPart_pdgId = array.array('i', [0] * max_objs)

    t.SetBranchAddress('run', run)
    t.SetBranchAddress('luminosityBlock', luminosityBlock)
    t.SetBranchAddress('event', event)

    t.SetBranchAddress(x_var, x_branch)
    if is2D:
        t.SetBranchAddress(y_var, y_branch)

    t.SetBranchAddress('genWeight', genWeight)
    t.SetBranchAddress('puWeight', puWeight)
    t.SetBranchAddress('puWeightUp', puWeightUp)
    t.SetBranchAddress('puWeightDown', puWeightDown)
    t.SetBranchAddress('nLHEScaleWeight', nLHEScaleWeight)
    t.SetBranchAddress('LHEScaleWeight', LHEScaleWeight)

    if apply_mll_cut:
        t.SetBranchAddress('nLHEPart', nLHEPart)
        t.SetBranchAddress('LHEPart_pt', LHEPart_pt)
        t.SetBranchAddress('LHEPart_eta', LHEPart_eta)
        t.SetBranchAddress('LHEPart_phi', LHEPart_phi)
        t.SetBranchAddress('LHEPart_mass', LHEPart_mass)
        t.SetBranchAddress('LHEPart_pdgId', LHEPart_pdgId)

    bins = {
        'LHE_Njets': list(map(float, range(6))),
        'LHE_HT': [0., 100., 200., 400., 600., 800., 1200., 2500., 1.e5]
    }

    bins_arr = {key: array.array('d', bins[key]) for key in bins}
    mll_bins = [10., 50.]

    def get_mll_str(val):
        assert (len(mll_bins) == 2)
        if val < mll_bins[0]:
            return 'lt%s' % str(int(mll_bins[0]))
        elif mll_bins[0] <= val < mll_bins[1]:
            return '%sto%s' % (str(int(mll_bins[0])), str(int(mll_bins[1])))
        elif val >= mll_bins[1]:
            return 'gt%s' % (str(int(mll_bins[1])))
        else:
            raise RuntimeError('Unexpected value: %f (%s)' % (val, mll_bins))

    histograms = collections.OrderedDict()

    bins_x = bins_arr[x_var]
    bins_y = bins_arr[y_var] if is2D else None

    def create_histogram(key, title):
        if is2D:
            histograms[key] = ROOT.TH2D(key, title,
                                        len(bins_x) - 1, bins_x,
                                        len(bins_y) - 1, bins_y)
            for bin_idx in range(len(bins_x) - 1):
                histograms[key].GetXaxis().SetBinLabel(
                    bin_idx + 1, '%d <= %s < %d' %
                    (bins_x[bin_idx], x_var, bins_x[bin_idx + 1]))
            for bin_idx in range(len(bins_y) - 1):
                histograms[key].GetYaxis().SetBinLabel(
                    bin_idx + 1, '%d <= %s < %d' %
                    (bins_y[bin_idx], y_var, bins_y[bin_idx + 1]))
            histograms[key].SetXTitle(x_var)
            histograms[key].SetYTitle(y_var)
        else:
            histograms[key] = ROOT.TH1D(key, title, len(bins_x) - 1, bins_x)
            for bin_idx in range(len(bins_x) - 1):
                histograms[key].GetXaxis().SetBinLabel(
                    bin_idx + 1, '%d <= %s < %d' %
                    (bins_x[bin_idx], x_var, bins_x[bin_idx + 1]))
            histograms[key].SetXTitle(x_var)

    count_keys = collections.OrderedDict([
        ('Count', {
            'nbins': 1,
            'title': 'sum(1)',
        }),
        ('CountFullWeighted', {
            'nbins': 3,
            'title': 'sum(gen * PU(central,up,down))',
        }),
        ('CountWeighted', {
            'nbins': 3,
            'title': 'sum(sgn(gen) * PU(central,up,down))',
        }),
        ('CountFullWeightedNoPU', {
            'nbins': 1,
            'title': 'sum(gen)',
        }),
        ('CountPosWeight', {
            'nbins': 1,
            'title': 'sum(gen > 0)'
        }),
        ('CountNegWeight', {
            'nbins': 1,
            'title': 'sum(gen < 0)'
        }),
        ('CountWeightedNoPU', {
            'nbins': 1,
            'title': 'sum(sgn(gen))',
        }),
        ('CountWeightedLHEWeightScale', {
            'nbins': req_nLHESW,
            'title': 'sum(sgn(gen) * PU(central) * LHE(scale))',
        }),
        ('CountWeightedLHEWeightScaleNoPU', {
            'nbins': req_nLHESW,
            'title': 'sum(sgn(gen) * LHE(scale))',
        }),
        ('CountFullWeightedLHEWeightScale', {
            'nbins': req_nLHESW,
            'title': 'sum(gen * PU(central) * LHE(scale))',
        }),
        ('CountFullWeightedLHEWeightScaleNoPU', {
            'nbins': req_nLHESW,
            'title': 'sum(gen * LHE(scale))',
        }),
    ])

    if apply_mll_cut:
        for val in [
                mll_bins[0] - 1., (mll_bins[0] + mll_bins[1]) / 2.,
                mll_bins[1] + 1
        ]:
            for count_key, count_settings in count_keys.items():
                for histogram_idx in range(count_settings['nbins']):
                    mll_str = get_mll_str(val)
                    key = '%s_%d_%s' % (count_key, histogram_idx, mll_str)
                    title = count_settings['title']
                    if count_settings['nbins'] > 1:
                        title += ' [bin = %d]' % histogram_idx
                    title += ' (mll %s)' % mll_str
                    create_histogram(key, title)
    else:
        for count_key, count_settings in count_keys.items():
            for histogram_idx in range(count_settings['nbins']):
                key = '%s_%d' % (count_key, histogram_idx)
                title = count_settings['title']
                if count_settings['nbins'] > 1:
                    title += ' [bin = %d]' % histogram_idx
                create_histogram(key, title)

    def clip(value, min_val=-10., max_val=10.):
        return min(max(value, min_val), max_val)

    def plot2d(histogram, plot_fn_base, width=1200, height=900):
        canvas = ROOT.TCanvas('c1', 'c1', width, height)
        ROOT.gStyle.SetOptStat(0)
        histogram.Draw('col text')
        canvas.SetLogy()
        canvas.SaveAs('%s.png' % plot_fn_base)
        canvas.SaveAs('%s.pdf' % plot_fn_base)
        del canvas

    def plot1d(histogram, plot_fn_base, width=1200, height=900):
        canvas = ROOT.TCanvas('c1', 'c1', width, height)
        ROOT.gStyle.SetOptStat(0)
        histogram.SetLineWidth(2)
        histogram.Draw('hist')
        canvas.SetLogx()
        canvas.SetLogy()
        canvas.SetGrid()
        canvas.SaveAs('%s.png' % plot_fn_base)
        canvas.SaveAs('%s.pdf' % plot_fn_base)
        del canvas

    n = t.GetEntries()
    printEvery = 100000

    for i in range(n):
        t.GetEntry(i)
        if i % printEvery == 0:
            rle = ':'.join(
                map(lambda x: str(x[0]), [run, luminosityBlock, event]))
            print('Processing event %d: %s' % (i, rle))

        if apply_mll_cut:
            invmass = []
            for j in range(nLHEPart[0]):
                if abs(LHEPart_pdgId[j]) in [11, 13, 15]:
                    lv = ROOT.TLorentzVector()
                    lv.SetPtEtaPhiM(LHEPart_pt[j], LHEPart_eta[j],
                                    LHEPart_phi[j], LHEPart_mass[j])
                    invmass.append(lv)
            if len(invmass) != 2:
                continue
            mll = (invmass[0] + invmass[1]).M()
            suffix = '_%s' % get_mll_str(mll)
        else:
            suffix = ''

        genWeight_sign = np.sign(genWeight[0])
        counts = {
            'Count_0': 1.,
            'CountWeighted_0': genWeight_sign * puWeight[0],
            'CountWeighted_1': genWeight_sign * puWeightUp[0],
            'CountWeighted_2': genWeight_sign * puWeightDown[0],
            'CountFullWeighted_0': genWeight[0] * puWeight[0],
            'CountFullWeighted_1': genWeight[0] * puWeightUp[0],
            'CountFullWeighted_2': genWeight[0] * puWeightDown[0],
            'CountWeightedNoPU_0': genWeight_sign,
            'CountFullWeightedNoPU_0': genWeight[0],
            'CountPosWeight_0': genWeight[0] * (genWeight_sign > 0),
            'CountNegWeight_0': genWeight[0] * (genWeight_sign < 0),
        }
        if nLHEScaleWeight[0] != req_nLHESW:
            print('Error: event #%d' % i)
            continue
        for j in range(nLHEScaleWeight[0]):
            LHEScaleWeight_clipped = clip(LHEScaleWeight[j])
            counts['CountWeightedLHEWeightScale_%d' %
                   j] = genWeight_sign * puWeight[0] * LHEScaleWeight_clipped
            counts['CountWeightedLHEWeightScaleNoPU_%d' %
                   j] = genWeight_sign * LHEScaleWeight_clipped
            counts['CountFullWeightedLHEWeightScale_%d' %
                   j] = genWeight[0] * puWeight[0] * LHEScaleWeight_clipped
            counts['CountFullWeightedLHEWeightScaleNoPU_%d' %
                   j] = genWeight[0] * LHEScaleWeight_clipped
        for count_key in counts:
            key = count_key + suffix
            evtWeight = counts[count_key]
            if is2D:
                histograms[key].Fill(x_branch[0], y_branch[0], evtWeight)
            else:
                histograms[key].Fill(x_branch[0], evtWeight)

    f_out = ROOT.TFile.Open(fn_out, 'recreate')
    f_out.cd()
    for histogram in histograms.values():
        histogram.Write()
        if plot_dir:
            if is2D:
                plot2d(histogram, os.path.join(plot_dir, histogram.GetName()))
            else:
                plot1d(histogram, os.path.join(plot_dir, histogram.GetName()))

    f.Close()
    f_out.Close()

Esempio n. 17

0

Mostra file

File: addMEMConfig.py Progetto: kartikmaurya/tth-htt

    def memJobList(self, inputFileList):
        '''
        Args:
          inputFileList:{ int, array of strings }; i.e. fileset* ID and the list of files

        * if the script were to generate configuration files, this number would correspond to job ID

        Returns:
          { int : { str : int, str : [str, str, ...], str : [int, int] } }
            |        |          |                      |
         job id  "fileset_id" "input_fileset"     "event_range"

        The function reads a given set of files and determines the event range
        '''
        memJobDict = {}
        jobId = 0
        for filesetId, inputFileSet in inputFileList.iteritems():
            memJobDict_common = {
                'fileset_id': filesetId,
                'input_fileset': inputFileSet
            }
            ch = ROOT.TChain(self.treeName)
            for fn in inputFileSet:
                # chaining a file
                logging.debug("Processing file {fileName}".format(fileName=fn))
                ch.AddFile(fn)

            nof_entries = ch.GetEntries()

            memJobDict_common['nof_entries'] = nof_entries
            if nof_entries == 0:
                jobId += 1
                memJobDict[jobId] = dict(
                    {
                        'event_range': [0, 0],
                        'nof_int': 0,
                        'nof_int_pass': 0,
                        'nof_events_pass': 0,
                        'nof_zero': 0,
                    }, **memJobDict_common)
                continue

            current_pos = 0
            evt_ranges = []

            counter, counter_arr = 0, []
            nof_events_pass_counter, nof_events_pass = 0, []
            nof_int_pass_counter, nof_int_pass = 0, []
            nof_zero_integrations, nof_events_zero = 0, []

            maxPermutations_addMEM = array.array('i', [0])
            ch.SetBranchAddress(self.maxPermutations_branchName,
                                maxPermutations_addMEM)

            for i in range(nof_entries):
                ch.GetEntry(i)
                if i > 0 and i % 10000 == 0:
                    logging.debug("Processing event %i/%i" % (i, nof_entries))

                nof_integrations = maxPermutations_addMEM[0]
                if nof_integrations < 0:
                    nof_integrations = 0

                if nof_integrations >= 1:
                    nof_events_pass_counter += 1
                    nof_int_pass_counter += nof_integrations
                else:
                    nof_zero_integrations += 1

                if nof_integrations > self.mem_integrations_per_job:
                    raise ValueError(
                        "Too many nof_integrations = %d in file(s) %s at %d:%d:%d"
                        % (nof_integrations, ', '.join(inputFileSet), ch.run,
                           ch.lumi, ch.evt))

                if (counter +
                        nof_integrations) > self.mem_integrations_per_job:
                    if evt_ranges:
                        evt_ranges.append([evt_ranges[-1][1], current_pos])
                    else:
                        evt_ranges.append([0, current_pos])
                    counter_arr.append(counter)
                    counter = 0

                    nof_events_pass.append(nof_events_pass_counter)
                    nof_events_pass_counter = 0

                    nof_int_pass.append(nof_int_pass_counter)
                    nof_int_pass_counter = 0

                    nof_events_zero.append(nof_zero_integrations)
                    nof_zero_integrations = 0
                counter += nof_integrations
                current_pos += 1

            if counter <= self.mem_integrations_per_job and counter >= 0:
                if evt_ranges:
                    evt_ranges.append([evt_ranges[-1][1], int(nof_entries)])
                else:
                    evt_ranges.append([0, int(nof_entries)])
                counter_arr.append(counter)
                nof_events_pass.append(nof_events_pass_counter)
                nof_int_pass.append(nof_int_pass_counter)
                nof_events_zero.append(nof_zero_integrations)

            # ensure that the event ranges won't overlap (i.e. there won't be any double-processing of any event)
            evt_ranges_cat = []
            for v in [range(x[0], x[1]) for x in evt_ranges]:
                evt_ranges_cat += v
            assert (evt_ranges_cat == range(nof_entries))
            assert (bool(evt_ranges))

            for i in range(len(evt_ranges)):
                jobId += 1
                memJobDict[jobId] = dict(
                    {
                        'event_range': evt_ranges[i],
                        'nof_int': counter_arr[i],
                        'nof_int_pass': nof_int_pass[i],
                        'nof_events_pass': nof_events_pass[i],
                        'nof_zero': nof_events_zero[i],
                    }, **memJobDict_common)
                # we now have all event ranges per one file, let's add them to the dictionary

        return memJobDict

Esempio n. 18

0

Mostra file

File: graph_gen_relations.py Progetto: huiling110/tth-htt

 def __init__(self):
     self.pdgTable = ROOT.TDatabasePDG()

Esempio n. 19

0

Mostra file

File: merge_htxs.py Progetto: huiling110/tth-htt

      histogram_name_base = 'ttH_{}'.format(ptbin_name)

    histogram_names = [ key.GetName() for key in htxs_dir.GetListOfKeys() if key.GetName().endswith(fitvar) ]
    if not histogram_names:
      raise RuntimeError(
        "Unable to find any histograms ending with '%s' in directory %s/%s of file %s" % \
        (fitvar, evt_dirname, htxs_dirname, input_hadd2_fn)
      )

    for histogram_name in histogram_names:
      histogram_name_new = histogram_name_base
      if histogram_name != fitvar:
        sysname = histogram_name[:-fitvar_len_plus_1]
        if not sysname.endswith(('Down', 'Up')):
          raise RuntimeError("Invalid systematics in histogram name %s" % histogram_name)
        histogram_name_new += '_{}'.format(sysname)
      assert(histogram_name_new not in histograms)
      histograms[histogram_name_new] = htxs_dir.Get(histogram_name)

  output.cd()

  for histogram_name in histograms:
    histogram = ROOT.TH1F()
    histograms[histogram_name].Copy(histogram)
    histogram.SetName(histogram_name)
    histogram.SetTitle(histogram_name)
    histogram.Write()

  output.Close()
  input_hadd2.Close()

Esempio n. 20

0

Mostra file

            for sample_idx, sample_name in enumerate(results.keys()):
                logging.info("Creating plots for sample {} ({}/{})".format(
                    sample_name, sample_idx + 1, nof_samples))
                plot_output(pdf)

out_fptr = ROOT.TFile.Open(output, 'recreate')
for sample_key in results:
    logging.info("Saving histograms for process {}".format(sample_key))
    sample_dir = out_fptr.mkdir(sample_key)
    sample_dir.cd()
    assert ('central' in results[sample_key])
    for sys_key in results[sample_key]:
        if central_only and sys_key != 'central':
            continue
        if not sys_key or sys_key.startswith('envelope'):
            continue
        ratios = results[sample_key][sys_key]['ratio']['count']
        ratio_errs = results[sample_key][sys_key]['ratio']['err']
        nbins = len(ratios)
        xbins = array.array('f', range(nbins))
        histogram = ROOT.TH1D(sys_key, sys_key, len(xbins) - 1, xbins)
        histogram.SetDirectory(sample_dir)
        histogram.SetXTitle('# preselected jets')
        histogram.SetTitle('{} ({})'.format(sample_key, sys_key))
        for bin_idx in range(nbins):
            histogram.SetBinContent(bin_idx + 1, ratios[bin_idx])
            histogram.SetBinError(bin_idx + 1, ratio_errs[bin_idx])
        histogram.Write()
        del histogram
out_fptr.Close()

Esempio n. 21

0

Mostra file

File: inspect_rle_numbers.py Progetto: abbywarden/tth-htt

def build_rle_file(rles, output):
    rles_aggregated = collections.OrderedDict()
    region_plus_sys_arr = collections.OrderedDict()
    for channel in rles:
        rles_aggregated[channel] = collections.OrderedDict()
        region_plus_sys_arr[channel] = []
        for region in rles[channel]:
            for sample_name in rles[channel][region]:
                if sample_name not in rles_aggregated[channel]:
                    rles_aggregated[channel][
                        sample_name] = collections.OrderedDict()
                for central_or_shift in rles[channel][region][sample_name]:
                    region_plus_sys = region + '_' + central_or_shift
                    if region_plus_sys not in region_plus_sys_arr:
                        region_plus_sys_arr[channel].append(region_plus_sys)
                    for rle in rles[channel][region][sample_name][
                            central_or_shift]:
                        assert (REGEX_RLE.match(rle))
                        if rle not in rles_aggregated[channel][sample_name]:
                            rles_aggregated[channel][sample_name][rle] = []
                        rles_aggregated[channel][sample_name][rle].append(
                            region_plus_sys)
    output_file = ROOT.TFile(output, 'recreate')
    for channel in rles_aggregated:
        channel_dir = output_file.mkdir(channel)
        channel_dir.cd()
        event_count = 0
        for sample_name in rles_aggregated[channel]:
            tree = ROOT.TTree(sample_name, sample_name)
            run = array.array('I', [0])
            luminosityBlock = array.array('I', [0])
            event = array.array('L', [0])
            region_plus_sys_brs = collections.OrderedDict()
            for region_plus_sys in region_plus_sys_arr[channel]:
                region_plus_sys_brs[region_plus_sys] = array.array('I', [0])
            tree.Branch('run', run, 'run/i')
            tree.Branch('luminosityBlock', luminosityBlock,
                        'luminosityBlock/i')
            tree.Branch('event', event, 'event/l')
            for region_plus_sys in region_plus_sys_brs:
                tree.Branch(region_plus_sys,
                            region_plus_sys_brs[region_plus_sys],
                            '%s/i' % region_plus_sys)
            event_count_sample = len(rles_aggregated[channel][sample_name])
            logging.info(
                'Found a total of {} events in sample {} and channel {}'.
                format(event_count_sample, sample_name, channel))
            event_count += event_count_sample
            for rle in rles_aggregated[channel][sample_name]:
                rle_split = rle.split(':')
                run[0] = int(rle_split[0])
                luminosityBlock[0] = int(rle_split[1])
                event[0] = int(rle_split[2])
                for region_plus_sys in region_plus_sys_brs:
                    region_plus_sys_brs[region_plus_sys][0] = int(
                        region_plus_sys in rles_aggregated[channel]
                        [sample_name][rle])
                tree.Fill()
            output_file.Write()
        logging.info('Found a total of {} events in channel {}'.format(
            event_count, channel))
        logging.info('Wrote file {}'.format(output))

Esempio n. 22

0

Mostra file

File: validate_lumiscale.py Progetto: saswatinandan/tth-htt

def plot(input_files, output_files, title, expected_neff, mode):
  histogram_dict = {}
  for sample_name, sample_entry in input_files.items():
    if not hdfs.isfile(sample_entry['input']):
      logging.error('Could not find file {}'.format(sample_entry['input']))
      continue
    root_file = ROOT.TFile.Open(sample_entry['input'], 'read')
    logging.debug('Opened file {}'.format(sample_entry['input']))
    root_directories = list(filter(
      lambda root_dir: root_dir != None, [
        root_file.Get(os.path.join(key.GetName(), mode, 'genEvt')) \
        for key in root_file.GetListOfKeys() if key.GetClassName() == 'TDirectoryFile'
      ]
    ))
    if len(root_directories) != 1:
      raise RuntimeError('Expected single directory in %s' % sample_entry['input'])
    root_dir = root_directories[0]
    histogram_dirs = [
      root_dir.Get(key.GetName()) \
      for key in root_dir.GetListOfKeys() if key.GetClassName() == 'TDirectoryFile'
    ]
    if len(histogram_dirs) != 1:
      raise RuntimeError(
        'Expected single directory containing lumiScale histograms in %s' % sample_entry['input']
      )
    histogram_dir = histogram_dirs[0]
    histograms = [
      key.GetName() for key in histogram_dir.GetListOfKeys() \
      if key.GetClassName().startswith('TH1') and 'lumiScale' in key.GetName()
    ]
    for histogram_name_actual in histograms:
      histogram_name = histogram_name_actual.replace('_lumiScale', '').replace('CMS_ttHl_', '') \
                       if histogram_name_actual != 'lumiScale' else 'central'
      histogram = histogram_dir.Get(histogram_name_actual).Clone()
      histogram.SetDirectory(0)
      if histogram.GetEntries() != sample_entry['nentries'] and mode == 'unbiased':
        raise RuntimeError('Expected {} entries from {} in file {}, but got {} entries'.format(
          sample_entry['nentries'], histogram_name, sample_entry['input'], histogram.GetEntries(),
        ))
      if histogram_name not in histogram_dict:
        histogram_dict[histogram_name] = {
          'histogram' : histogram,
          'nentries'  : histogram.GetEntries(),
          'nfiles'    : 1,
        }
      else:
        histogram_dict[histogram_name]['histogram'].Add(histogram)
        histogram_dict[histogram_name]['nentries'] += histogram.GetEntries()
        histogram_dict[histogram_name]['nfiles'] += 1

    root_file.Close()

  if not histogram_dict:
    logging.error('Could not find histograms for samples {}'.format(', '.join(list(input_files.keys()))))
    return

  if len(set(histogram_dict[histogram_name]['nfiles'] for histogram_name in histogram_dict)) != 1:
    raise RuntimeError(
      'Inconsistent number of files found for samples %s' % ', '.join(list(input_files.keys()))
    )
  if len(set(histogram_dict[histogram_name]['nentries'] for histogram_name in histogram_dict)) != 1:
    raise RuntimeError(
      'Inconsistent number of entries found in samples %s' % ', '.join(list(input_files.keys()))
    )

  min_y = -1
  max_y = -1
  nentries = -1
  for histograms in histogram_dict.values():
    histogram = histograms['histogram']
    y_content = histogram.GetBinContent(1)
    y_error   = histogram.GetBinError(1)

    y_down = y_content - y_error
    y_up   = y_content + y_error

    if min_y < 0:
      min_y = y_down
    if max_y < 0:
      max_y = y_up
    if y_down < min_y:
      min_y = y_down
    if y_up > max_y:
      max_y = y_up

    if nentries < 0:
      nentries = histograms['nentries']
    else:
      assert(nentries == histograms['nentries'])

    if not (y_down < expected_neff < y_up) and mode == 'unbiased':
      logging.warning(
        "Effective event count {} not within {} +- {}".format(expected_neff, y_content, y_error)
      )

  if mode == 'unbiased':
    min_y = min(min_y, expected_neff)
    max_y = max(max_y, expected_neff)
  diff = 0.2 * (max_y - min_y)
  min_y -= diff
  max_y += diff

  canvas = ROOT.TCanvas('c', 'c', 1200, 900)
  canvas.SetGrid()
  ROOT.gStyle.SetOptStat(0)

  legend = ROOT.TLegend(0.1, 0.7, 0.48, 0.9)
  legend.SetHeader('N_{eff} (%d entries)' % nentries)

  expected_histogram = None

  line_width = 3
  marker_style = 20
  fill_style = 4000

  lines = []

  for idx, histogram_name in enumerate(sorted(histogram_dict.keys())):
    histogram = histogram_dict[histogram_name]['histogram']
    color = 2 + idx

    histogram.SetTitle(title)
    histogram.SetAxisRange(min_y, max_y, "Y")
    histogram.SetLineColor(color)
    histogram.SetMarkerColor(color)
    histogram.SetLineWidth(line_width)
    histogram.SetMarkerStyle(marker_style)
    histogram.SetFillStyle(fill_style)
    histogram.Draw("l e1%s" % (" same" if idx > 0 else ""))

    y_content = histogram.GetBinContent(1)
    y_error   = histogram.GetBinError(1)
    y_up      = y_content + y_error
    y_down    = y_content - y_error

    bin_width  = histogram.GetBinWidth(1)
    bin_center = histogram.GetBinCenter(1)
    line_min_x = bin_center - bin_width / 4
    line_max_x = bin_center + bin_width / 4

    line_down = ROOT.TLine(line_min_x, y_down, line_max_x, y_down)
    line_down.SetLineColor(color)
    line_down.SetLineWidth(line_width)
    line_down.Draw()
    lines.append(line_down)

    line_up = ROOT.TLine(line_min_x, y_up, line_max_x, y_up)
    line_up.SetLineColor(color)
    line_up.SetLineWidth(line_width)
    line_up.Draw()
    lines.append(line_up)

    sig_digits = max(8 - int(math.ceil(math.log10(y_content))), 1) if y_content > 0. else 1
    leg_pattern = '%s (%.{}f #pm %.{}f)'.format(sig_digits, sig_digits)
    leg_name = leg_pattern % (histogram_name, y_content, y_error)
    legend.AddEntry(histogram, leg_name)

    logging.debug(
      'Effective event count for the sys unc option {} is {} +- {}'.format(
        histogram_name, y_content, y_error
      )
    )

    if not expected_histogram and mode == 'unbiased':
      expected_histogram = histogram.Clone()
      expected_histogram.Reset()
      expected_histogram.SetBinContent(1, expected_neff)
      expected_histogram.SetBinError(1, 0)
      expected_histogram.SetLineColor(ROOT.kBlack)
      expected_histogram.SetMarkerColor(ROOT.kBlack)
      expected_histogram.SetLineWidth(line_width)
      expected_histogram.SetMarkerStyle(marker_style)
      expected_histogram.SetLineStyle(9)
      expected_histogram.SetFillStyle(fill_style)

  if expected_histogram:
    logging.debug('Expecting {} events'.format(expected_neff))
    expected_histogram.Draw("e2 same")
    legend.AddEntry(expected_histogram, 'expected (%.1f)' % expected_neff)

  legend.Draw()

  for output_file in output_files:
    canvas.SaveAs(output_file)

  canvas.Close()
  legend.Delete()
  if expected_histogram:
    expected_histogram.Delete()
  for histogram_name in histogram_dict:
    histogram_dict[histogram_name]['histogram'].Delete()
  for line in lines:
    line.Delete()

Esempio n. 23

0

Mostra file

File: getRefGenWeight.py Progetto: huiling110/tth-htt

    "The most frequent LHE weight is: {:.6e}".format(most_frequent_weight))
with open(output_file, 'w') as output_file_ptr:
    output_file_ptr.write('{:.6e}'.format(most_frequent_weight))
logging.info("Wrote output file: {}".format(output_file))

if plot_files:
    has_neg_weights = any(weight_freq[0] < 0
                          for weight_freq in weights_by_frequency)
    binning_max = MAX_CUTOFF * most_frequent_weight
    binning_min = -MAX_CUTOFF * most_frequent_weight
    if not has_neg_weights:
        binning_min = max(0., binning_min)
    binning = array.array(
        'f', list(np.linspace(binning_min, binning_max, PLOT_BINS + 1)))

    histogram = ROOT.TH1D(GENWEIGHT_NAME, GENWEIGHT_NAME, PLOT_BINS, binning)
    for weight_freq in weights_by_frequency:
        histogram.Fill(weight_freq[0], weight_freq[1])
    histogram.GetXaxis().SetRange(0, histogram.GetNbinsX() + 1)

    for plot_file in plot_files:
        title = os.path.splitext(os.path.basename(plot_file))[0]
        histogram.SetTitle(title)

        canvas = ROOT.TCanvas()
        canvas.SetCanvasSize(1000, 800)
        canvas.SetLogy(True)
        histogram.Draw('hist')
        canvas.SaveAs(plot_file)
        canvas.Close()
        del canvas

Esempio n. 24

0

Mostra file

def comp_weights_2_wo_inclusive(f, samples, samples_to_stitch, split_var_1,
                                split_var_2):
    inclusive_samples = samples_to_stitch['inclusive']['samples']
    inclusive_binning_1 = samples_to_stitch['inclusive'][split_var_1]
    inclusive_binning_2 = samples_to_stitch['inclusive'][split_var_2]

    split_dict_1 = samples_to_stitch['exclusive'][split_var_1]
    split_dict_2 = samples_to_stitch['exclusive'][split_var_2]
    split_binning_1 = [sample['value'] for sample in split_dict_1]
    split_binning_2 = [sample['value'] for sample in split_dict_2]
    complete_binning_1 = list(
        sorted(
            list(
                map(
                    float,
                    set(inclusive_binning_1)
                    | set(list(
                        itertools.chain.from_iterable(split_binning_1)))))))
    complete_binning_2 = list(
        sorted(
            list(
                map(
                    float,
                    set(inclusive_binning_2)
                    | set(list(
                        itertools.chain.from_iterable(split_binning_2)))))))

    inclusive_xs = -1
    for sample_key, sample_entry in samples.items():
        if sample_key == 'sum_events': continue
        if sample_entry['process_name_specific'] == inclusive_samples[0]:
            inclusive_xs = sample_entry['xsection']
    assert (inclusive_xs > 0)

    # sum the inclusive nof events
    inclusive_nof_events = {}
    for sample_key, sample_entry in samples.items():
        if sample_key == 'sum_events': continue
        if sample_entry['process_name_specific'] in inclusive_samples:
            if not inclusive_nof_events:
                inclusive_nof_events = copy_nof_events(sample_entry)
            else:
                nof_events_keys = set(nof_key
                                      for nof_key in sample_entry['nof_events']
                                      if is_valid_event_type(nof_key))
                assert (nof_events_keys == set(inclusive_nof_events))
                for nof_key, nof_arr in sample_entry['nof_events'].items():
                    if not is_valid_event_type(nof_key):
                        continue
                    assert (len(nof_arr) == len(inclusive_nof_events[nof_key]))
                    for idx, nof in enumerate(nof_arr):
                        assert (nof > 0)
                        inclusive_nof_events[nof_key][idx] += nof

    # sum the binned nof events
    for binned_sample in split_dict_1:
        nof_events = {}
        xs = -1
        for sample_key, sample_entry in samples.items():
            if sample_key == 'sum_events': continue
            if sample_entry['process_name_specific'] in binned_sample[
                    'samples']:
                if not nof_events:
                    nof_events = copy_nof_events(sample_entry)
                    inclusive_nof_events_type = set(
                        event_type
                        for event_type in inclusive_nof_events.keys()
                        if is_valid_event_type(event_type))
                    nof_events_type = set(event_type
                                          for event_type in nof_events.keys()
                                          if is_valid_event_type(event_type))
                    assert (inclusive_nof_events_type == nof_events_type)
                else:
                    nof_events_keys = set(
                        nof_key for nof_key in sample_entry['nof_events']
                        if is_valid_event_type(nof_key))
                    assert (nof_events_keys == set(nof_events.keys()))
                    for nof_key, nof_arr in sample_entry['nof_events'].items():
                        if not is_valid_event_type(nof_key):
                            continue
                        assert (len(nof_arr) == len(nof_events[nof_key]))
                        for idx, nof in enumerate(nof_arr):
                            assert (nof > 0)
                            nof_events[nof_key][idx] += nof
                if xs < 0:
                    xs = sample_entry['xsection']
        assert (xs > 0)
        binned_sample['xsection'] = xs
        binned_sample['nof_events'] = nof_events

        lumis = {}
        for nof_key, nof_arr in binned_sample['nof_events'].items():
            lumis[nof_key] = list(
                map(lambda nof: nof / binned_sample['xsection'], nof_arr))
        binned_sample['lumis'] = lumis

    for binned_sample in split_dict_2:
        nof_events = {}
        xs = -1
        for sample_key, sample_entry in samples.items():
            if sample_key == 'sum_events': continue
            if sample_entry['process_name_specific'] in binned_sample[
                    'samples']:
                if not nof_events:
                    nof_events = copy_nof_events(sample_entry)
                    inclusive_nof_events_type = set(
                        event_type
                        for event_type in inclusive_nof_events.keys()
                        if is_valid_event_type(event_type))
                    nof_events_type = set(event_type
                                          for event_type in nof_events.keys()
                                          if is_valid_event_type(event_type))
                    assert (inclusive_nof_events_type == nof_events_type)
                else:
                    nof_events_keys = set(
                        nof_key for nof_key in sample_entry['nof_events']
                        if is_valid_event_type(nof_key))
                    assert (nof_events_keys == set(nof_events.keys()))
                    for nof_key, nof_arr in sample_entry['nof_events'].items():
                        if not is_valid_event_type(nof_key):
                            continue
                        assert (len(nof_arr) == len(nof_events[nof_key]))
                        for idx, nof in enumerate(nof_arr):
                            assert (nof > 0)
                            nof_events[nof_key][idx] += nof
                if xs < 0:
                    xs = sample_entry['xsection']
        assert (xs > 0)
        binned_sample['xsection'] = xs
        binned_sample['nof_events'] = nof_events

        lumis = {}
        for nof_key, nof_arr in binned_sample['nof_events'].items():
            lumis[nof_key] = list(
                map(lambda nof: nof / binned_sample['xsection'], nof_arr))
        binned_sample['lumis'] = lumis

    # compute integrated luminosities for the inclusive sample
    inclusive_lumis = {}
    for nof_key, nof_arr in inclusive_nof_events.items():
        inclusive_lumis[nof_key] = list(
            map(lambda nof: nof / inclusive_xs, nof_arr))

    # decide on the bin indices
    idxs_split_sample_1 = []
    idxs_split_sample_2 = []
    for binned_sample in split_dict_1:
        binned_idx = complete_binning_1.index(binned_sample['value'][0]) + 1
        binned_sample['idx'] = binned_idx
        idxs_split_sample_1.append(binned_idx)
    for binned_sample in split_dict_2:
        binned_idx = complete_binning_2.index(binned_sample['value'][0]) + 1
        binned_sample['idx'] = binned_idx
        idxs_split_sample_2.append(binned_idx)

    for binned_sample_1 in split_dict_1:
        for sample_name in binned_sample_1['samples']:
            if sample_name not in [key.GetName() for key in f.GetListOfKeys()]:
                histogram_dir_root = f.mkdir(sample_name)
            else:
                histogram_dir_root = f.Get(sample_name)
            subdir_name = '%s_v_%s_wo_inclusive' % (split_var_1, split_var_2)
            if subdir_name not in [
                    key.GetName()
                    for key in histogram_dir_root.GetListOfKeys()
            ]:
                histogram_dir = histogram_dir_root.mkdir(subdir_name)
            else:
                histogram_dir = histogram_dir_root.Get(subdir_name)
            histogram_dir.cd()

            for nof_key, lumi_arr in inclusive_lumis.items():
                if not is_valid_event_type(nof_key):
                    continue
                for idx, lumi_incl in enumerate(lumi_arr):

                    histogram_name = '%s_%d' % (nof_key, idx)
                    binning_1 = array.array('f', complete_binning_1)
                    binning_2 = array.array('f', complete_binning_2)

                    histogram = ROOT.TH2D(histogram_name, histogram_name,
                                          len(binning_1) - 1, binning_1,
                                          len(binning_2) - 1, binning_2)
                    histogram.SetDirectory(histogram_dir)
                    histogram.SetXTitle(split_var_1)
                    histogram.SetYTitle(split_var_2)

                    lumi_split_1 = -1
                    for split_idx_1 in range(1, len(binning_1)):
                        if split_idx_1 == binned_sample_1['idx']:
                            lumi_split_1 = binned_sample_1['lumis'][nof_key][
                                idx]
                            for split_idx_2 in range(1, len(binning_2)):
                                lumi_split_2 = 0.
                                for binned_sample_2 in split_dict_2:
                                    if split_idx_2 == binned_sample_2['idx']:
                                        lumi_split_2 = binned_sample_2[
                                            'lumis'][nof_key][idx]
                                        break
                                if binning_1[split_idx_1] > inclusive_binning_1[-1] or \
                                   binning_1[split_idx_1] < inclusive_binning_1[0] or \
                                   binning_2[split_idx_2] > inclusive_binning_2[-1] or \
                                   binning_2[split_idx_2] < inclusive_binning_2[0]:
                                    if lumi_split_2 == 0.:
                                        weight = 1.
                                    else:
                                        weight = lumi_split_1 / (lumi_split_1 +
                                                                 lumi_split_2)
                                else:
                                    weight = lumi_split_1 / (lumi_split_1 +
                                                             lumi_split_2)
                                assert (weight >= 0.)
                                histogram.SetBinContent(
                                    split_idx_1, split_idx_2, weight)
                        else:
                            for split_idx_2 in range(1, len(binning_2)):
                                histogram.SetBinContent(
                                    split_idx_1, split_idx_2, 0.)

                        histogram.GetXaxis().SetBinLabel(
                            split_idx_1, '%.0f <= %s < %.0f' %
                            (complete_binning_1[split_idx_1 - 1], split_var_1,
                             complete_binning_1[split_idx_1]))
                    for split_idx_2 in range(1, len(binning_2)):
                        histogram.GetYaxis().SetBinLabel(
                            split_idx_2, '%.0f <= %s < %.0f' %
                            (complete_binning_2[split_idx_2 - 1], split_var_2,
                             complete_binning_2[split_idx_2]))

                    histogram.Write()

    for binned_sample_2 in split_dict_2:
        for sample_name in binned_sample_2['samples']:
            if sample_name not in [key.GetName() for key in f.GetListOfKeys()]:
                histogram_dir_root = f.mkdir(sample_name)
            else:
                histogram_dir_root = f.Get(sample_name)
            subdir_name = '%s_v_%s_wo_inclusive' % (split_var_1, split_var_2)
            if subdir_name not in [
                    key.GetName()
                    for key in histogram_dir_root.GetListOfKeys()
            ]:
                histogram_dir = histogram_dir_root.mkdir(subdir_name)
            else:
                histogram_dir = histogram_dir_root.Get(subdir_name)
            histogram_dir.cd()

            for nof_key, lumi_arr in inclusive_lumis.items():
                if not is_valid_event_type(nof_key):
                    continue
                for idx, lumi_incl in enumerate(lumi_arr):

                    histogram_name = '%s_%d' % (nof_key, idx)
                    binning_1 = array.array('f', complete_binning_1)
                    binning_2 = array.array('f', complete_binning_2)

                    histogram = ROOT.TH2D(histogram_name, histogram_name,
                                          len(binning_1) - 1, binning_1,
                                          len(binning_2) - 1, binning_2)
                    histogram.SetDirectory(histogram_dir)
                    histogram.SetXTitle(split_var_1)
                    histogram.SetYTitle(split_var_2)

                    lumi_split_2 = -1
                    for split_idx_2 in range(1, len(binning_2)):
                        if split_idx_2 == binned_sample_2['idx']:
                            lumi_split_2 = binned_sample_2['lumis'][nof_key][
                                idx]
                            for split_idx_1 in range(1, len(binning_1)):
                                lumi_split_1 = 0.
                                for binned_sample_1 in split_dict_1:
                                    if split_idx_1 == binned_sample_1['idx']:
                                        lumi_split_1 = binned_sample_1[
                                            'lumis'][nof_key][idx]
                                        break
                                if binning_1[split_idx_1] > inclusive_binning_1[-1] or \
                                   binning_1[split_idx_1] < inclusive_binning_1[0] or \
                                   binning_2[split_idx_2] > inclusive_binning_2[-1] or \
                                   binning_2[split_idx_2] < inclusive_binning_2[0]:
                                    if lumi_split_1 == 0.:
                                        weight = 1.
                                    else:
                                        weight = lumi_split_2 / (lumi_split_1 +
                                                                 lumi_split_2)
                                else:
                                    weight = lumi_split_2 / (lumi_split_1 +
                                                             lumi_split_2)
                                histogram.SetBinContent(
                                    split_idx_1, split_idx_2, weight)
                        else:
                            for split_idx_1 in range(1, len(binning_1)):
                                histogram.SetBinContent(
                                    split_idx_1, split_idx_2, 0.)

                        histogram.GetYaxis().SetBinLabel(
                            split_idx_2, '%.0f <= %s < %.0f' %
                            (complete_binning_2[split_idx_2 - 1], split_var_2,
                             complete_binning_2[split_idx_2]))
                    for split_idx_1 in range(1, len(binning_1)):
                        histogram.GetXaxis().SetBinLabel(
                            split_idx_1, '%.0f <= %s < %.0f' %
                            (complete_binning_1[split_idx_1 - 1], split_var_1,
                             complete_binning_1[split_idx_1]))

                    histogram.Write()

Esempio n. 25

0

Mostra file

    def memJobList(self, inputFileList, rle_whitelist):
        '''
        Args:
          inputFileList:{ int, array of strings }; i.e. fileset* ID and the list of files

        * if the script were to generate configuration files, this number would correspond to job ID

        Returns:
          { int : { str : int, str : [str, str, ...], str : [int, int] } }
            |        |          |                      |
         job id  "fileset_id" "input_fileset"     "event_range"

        The function reads a given set of files and determines the event range
        '''
        memJobDict = {}
        jobId = 0
        apply_rle_filter = bool(self.rle_filter_file)
        for filesetId, inputFileSet in inputFileList.iteritems():
            memJobDict_common = { 'fileset_id' : filesetId, 'input_fileset' : inputFileSet }
            print("Processing file %s" % inputFileSet)
            ch = ROOT.TChain(self.treeName)
            for fn in inputFileSet:
                # chaining a file
                logging.debug("Processing file {fileName}".format(fileName = fn))
                ch.AddFile(fn)

            nof_entries = ch.GetEntries()

            memJobDict_common['nof_entries'] = nof_entries
            if nof_entries == 0:
                jobId += 1
                memJobDict[jobId] = dict({
                    'event_range'     : [0, 0],
                    'nof_int'         : 0,
                    'nof_int_pass'    : 0,
                    'nof_events_pass' : 0,
                    'nof_zero'        : 0,
                }, **memJobDict_common)
                continue

            current_pos = 0
            evt_ranges = []

            counter, counter_arr = 0, []
            nof_events_pass_counter, nof_events_pass   = 0, []
            nof_int_pass_counter,    nof_int_pass      = 0, []
            nof_zero_integrations,   nof_events_zero   = 0, []
            whitelist_all,           whitelist_running = [], []

            run                    = array.array('I', [0])
            luminosityBlock        = array.array('I', [0])
            event                  = array.array('L', [0])
            maxPermutations_addMEM = array.array('i', [0])
            ch.SetBranchAddress("run",             run)
            ch.SetBranchAddress("luminosityBlock", luminosityBlock)
            ch.SetBranchAddress("event",           event)
            if self.maxPermutations_branchName is not None and self.maxPermutations_branchName != "":
              ch.SetBranchAddress(self.maxPermutations_branchName, maxPermutations_addMEM)
            else:
              maxPermutations_addMEM[0] = 1

            for i in range(nof_entries):
                ch.GetEntry(i)
                if i > 0 and i % 10000 == 0:
	            print(" Processing event %i/%i" % (i, nof_entries))
                    logging.debug("Processing event %i/%i" % (i, nof_entries))

                rle = ':'.join(map(lambda nr: str(nr[0]), [ run, luminosityBlock, event ]))

                nof_integrations = maxPermutations_addMEM[0]
                if apply_rle_filter:
                    if rle in rle_whitelist:
                        if not (nof_integrations > 0):
                            logging.error("Expected non-zero # integrations in event {}, but got {}".format(rle, nof_integrations))
                        nof_integrations = 1
                    else:
                        nof_integrations = 0

                if nof_integrations < 0:
                    nof_integrations = 0

                if nof_integrations >= 1:
                    nof_events_pass_counter += 1
                    nof_int_pass_counter += nof_integrations
                else:
                    nof_zero_integrations += 1

                if nof_integrations > self.mem_integrations_per_job:
                    raise ValueError("Too many nof_integrations = %d in file(s) %s at %d:%d:%d" %
                                     (nof_integrations, ', '.join(inputFileSet), ch.run, ch.lumi, ch.evt))

                if (counter + nof_integrations) > self.mem_integrations_per_job:
                    if evt_ranges:
                        evt_ranges.append([evt_ranges[-1][1], current_pos])
                    else:
                        evt_ranges.append([0, current_pos])
                    counter_arr.append(counter)
                    counter = 0

                    nof_events_pass.append(nof_events_pass_counter)
                    nof_events_pass_counter = 0

                    nof_int_pass.append(nof_int_pass_counter)
                    nof_int_pass_counter = 0

                    nof_events_zero.append(nof_zero_integrations)
                    nof_zero_integrations = 0

                    if apply_rle_filter:
                        whitelist_all.append(whitelist_running)
                        whitelist_running = []

                if rle in rle_whitelist:
                    whitelist_running.append(rle)

                counter += nof_integrations
                current_pos += 1

            if counter <= self.mem_integrations_per_job and counter >= 0:
                if evt_ranges:
                    evt_ranges.append([evt_ranges[-1][1], int(nof_entries)])
                else:
                    evt_ranges.append([0, int(nof_entries)])
                counter_arr.append(counter)
                nof_events_pass.append(nof_events_pass_counter)
                nof_int_pass.append(nof_int_pass_counter)
                nof_events_zero.append(nof_zero_integrations)
                if apply_rle_filter:
                    whitelist_all.append(whitelist_running)

            # ensure that the event ranges won't overlap (i.e. there won't be any double-processing of any event)
            evt_ranges_cat = []
            for v in [range(x[0], x[1]) for x in evt_ranges]:
              evt_ranges_cat += v
            assert(evt_ranges_cat == range(nof_entries))
            assert(bool(evt_ranges))

            for i in range(len(evt_ranges)):
              if self.max_jobs_per_sample == -1 or jobId < self.max_jobs_per_sample:
                jobId += 1
                memJobDict[jobId] = dict({
                    'event_range'     : evt_ranges[i],
                    'nof_int'         : counter_arr[i],
                    'nof_int_pass'    : nof_int_pass[i],
                    'nof_events_pass' : nof_events_pass[i],
                    'nof_zero'        : nof_events_zero[i],
                    'whitelist'       : whitelist_all[i] if apply_rle_filter else [],
                }, **memJobDict_common)
                # we now have all event ranges per one file, let's add them to the dictionary

            del ch
        return memJobDict