Exemple #1
0
def skim(in_filename, out_filename, entry_list, tree_name = "Events"):
  '''
  Args:
    in_filename:  string,    Path to the input ROOT file
    out_filename: string,    Path to the output ROOT file
    entry_list:   int array, List of entries (not RLE numbers!) which are used to select the event
    tree_name:    string,    TTree name (default: tree)

  Returns:
    True,  if none of the entry numbers didn't exceed the actual number of entries in the ROOT file
    False, otherwise
  '''
  logging.debug("Processing file {in_filename}".format(in_filename = in_filename))
  f_in = ROOT.TFile(in_filename, "read")
  t_in = f_in.Get(tree_name)

  nof_entries = t_in.GetEntries()
  if max(entry_list) > nof_entries:
    logging.error("Max entry ID exceeds the number of entries in {root_filename}: {max_entry} > {nof_entries}".format(
      root_filename = in_filename,
      max_entry     = max(entry_list),
      nof_entries   = nof_entries,
    ))
    return False

  f_out = ROOT.TFile(out_filename, "recreate")
  t_out = t_in.CloneTree(0)

  for i in entry_list:
    t_in.GetEntry(i)
    t_out.Fill()

  t_out.AutoSave()
  logging.debug("Saved events to {out_filename}".format(out_filename = out_filename))
  return True
def validate_regions(rles):
    has_errors = False
    for channel in rles:
        validation_set = collections.OrderedDict()
        for region in rles[channel]:
            if 'Fakeable_mcClosure' in region:
                continue
            for sample_name in rles[channel][region]:
                if sample_name not in validation_set:
                    validation_set[sample_name] = collections.OrderedDict()
                for central_or_shift in rles[channel][region][sample_name]:
                    if central_or_shift not in validation_set[sample_name]:
                        validation_set[sample_name][
                            central_or_shift] = collections.OrderedDict()
                    for rle in rles[channel][region][sample_name][
                            central_or_shift]:
                        if rle not in validation_set[sample_name][
                                central_or_shift]:
                            validation_set[sample_name][central_or_shift][
                                rle] = []
                        if region not in validation_set[sample_name][
                                central_or_shift][rle]:
                            validation_set[sample_name][central_or_shift][
                                rle].append(region)
        for sample_name in validation_set:
            has_errors_sample = False
            for central_or_shift in validation_set[sample_name]:
                for rle in validation_set[sample_name][central_or_shift]:
                    regions = validation_set[sample_name][central_or_shift][
                        rle]
                    if len(regions) > 1:
                        if 'hh' in channel and any(
                                len(
                                    set(
                                        region.replace(
                                            '_hadTau{}'.format(hadTauCharge),
                                            '').replace(
                                                '_lep{}'.format(lepCharge), ''
                                            ).replace(
                                                '_sum{}'.format(sumCharge), '')
                                        for region in regions)) == 1
                                for hadTauCharge in ['SS', 'OS']
                                for lepCharge in ['SS', 'OS']
                                for sumCharge in ['SS', 'OS']):
                            continue
                        logging.error(
                            "Found duplicates in channel {} and sample {} for event {}: regions {}"
                            .format(channel, sample_name, rle,
                                    ', '.join(regions)))
                        has_errors_sample = True
            if not has_errors_sample:
                logging.info(
                    'No overlapping events found between regions for sample {} in channel {}'
                    .format(sample_name, channel))
            has_errors = has_errors or has_errors_sample
    return has_errors
Exemple #3
0
def load_dict(path, name):
  if not os.path.isfile(path):
    logging.error("No such dictionary file: {dict_path}".format(dict_path = path))
    sys.exit(1)
  imp_dict = imp.load_source('', path)
  if not hasattr(imp_dict, name):
    logging.error("No such dictionary in the file '{dict_path}': {dict_name}".format(
      dict_path = path, dict_name = name,
    ))
    sys.exit(1)
  samples = getattr(imp_dict, name)
  return samples
def validate_channels(rles):
    validation_set = collections.OrderedDict()
    for channel in rles:
        for region in rles[channel]:
            if 'Tight' not in region:
                continue
            for sample_name in rles[channel][region]:
                if sample_name not in validation_set:
                    validation_set[sample_name] = collections.OrderedDict()
                for central_or_shift in rles[channel][region][sample_name]:
                    if central_or_shift not in validation_set[sample_name]:
                        validation_set[sample_name][
                            central_or_shift] = collections.OrderedDict()
                    for rle in rles[channel][region][sample_name][
                            central_or_shift]:
                        if rle not in validation_set[sample_name][
                                central_or_shift]:
                            validation_set[sample_name][central_or_shift][
                                rle] = collections.OrderedDict()
                        validation_set[sample_name][central_or_shift][rle][
                            channel] = region
    has_errors = False
    for sample_name in validation_set:
        for central_or_shift in validation_set[sample_name]:
            for rle in validation_set[sample_name][central_or_shift]:
                if len(validation_set[sample_name][central_or_shift][rle]) > 1:
                    if '2los_1tau' in validation_set[sample_name][central_or_shift][rle]           and \
                        validation_set[sample_name][central_or_shift][rle]['2los_1tau'] == 'Tight' and \
                       '2lss_1tau' in validation_set[sample_name][central_or_shift][rle]           and \
                        validation_set[sample_name][central_or_shift][rle]['2lss_1tau'] == 'Tight_OS_OS':
                        continue
                    logging.error(
                        "Found the same event {} from sample {} in multiple channels: {}"
                        .format(
                            rle, sample_name, ', '.join([
                                '%s (region %s, systematics %s)' %
                                (channel, validation_set[sample_name]
                                 [central_or_shift][rle][channel],
                                 central_or_shift)
                                for channel in validation_set[sample_name]
                                [central_or_shift][rle]
                            ])))
                    has_errors = True
    if not has_errors:
        logging.info(
            "No overlaps found between the signal regions of channels: {}".
            format(', '.join(rles.keys())))
    return has_errors
Exemple #5
0
def cleanup(dir_name, exit = False):
  '''Removes a directory and might exit hard from the script
  Args:
    dir_name: string, Path to directory which is to be removed
    exit:     bool,   If True, exits the script via sys.exit() (default: False)

  Returns:
    None
  '''
  logging.debug("Removing directory {dir_name}".format(dir_name = dir_name))
  try:
    shutil.rmtree(dir_name)
  except IOError:
    logging.error("Managed to get an error while removing directory {dir_name}".format(dir_name = dir_name))
  if exit:
    sys.exit(1)
  def run(self, clean):
    record_software_state(self.sw_ver_file_cfg, self.sw_ver_file_out, DEPENDENCIES)
    target = 'all'
    if clean:
      if not os.path.isfile(self.makefile_path):
        logging.error(
          "The makefile %s is missing and therefore it's not possible to clean anything; "
          "run sync Ntuple production first!" % self.makefile_path
        )
        sys.exit(1)
      target = 'clean'

    nof_parallel_jobs = len(self.channel_info)
    make_cmd          = "make -f %s -j %d %s 2>%s 1>%s" % \
      (self.makefile_path, nof_parallel_jobs, target, self.stderr_file_path, self.stdout_file_path)
    logging.info("Running the make command: %s" % make_cmd)
    run_cmd(make_cmd)
    logging.info("All done")
def create_dir_if_not_exist(d):
    '''Creates the directory if it doesn't exist
  Args:
    d: string, the diretory to be created

  Returns:
    True, if the directory was created
    False, otherwise
  '''
    if not os.path.isdir(d):
        logging.debug(
            "Directory '{dir_name}' doesn't exist, attempting to create it".
            format(dir_name=d, ))
        try:
            os.makedirs(d)
        except IOError:
            logging.error("Could not create the directory")
            return False
    return True
def validate_pu(output_file, samples):
    error_code = 0
    if not os.path.isfile(output_file):
        logging.error('File {} does not exist'.format(output_file))
        return 1
    histogram_file = ROOT.TFile.Open(output_file, 'read')
    if not histogram_file:
        logging.error('Not a valid ROOT file: {}'.format(output_file))
        return 2
    for sample_name, sample_info in samples.items():
        is_mc = (sample_info["type"] == "mc")
        if not is_mc:
            continue
        process_name = sample_info["process_name_specific"]
        expected_nof_events = sample_info["nof_tree_events"]
        logging.info('Validating {} (expecting {} events)'.format(
            process_name, expected_nof_events))
        histogram = histogram_file.Get(process_name)
        if not histogram:
            logging.error("Could not find histogram '{}' in file {}".format(
                process_name, output_file))
            error_code = 3
            continue
        nof_events = int(histogram.GetEntries())
        if nof_events != expected_nof_events:
            logging.error(
                'Histogram {} in file {} has {} events, but expected {} events'
                .format(
                    process_name,
                    output_file,
                    nof_events,
                    expected_nof_events,
                ))
            error_code = 4
        else:
            logging.info(
                'Validation successful for sample {}'.format(process_name))
    histogram_file.Close()
    if error_code == 0:
        logging.info("Validation successful!")
    else:
        logging.error("Validation failed!")
    return error_code
def validate_regions(rles):
    has_errors = False
    for channel in rles:
        validation_set = collections.OrderedDict()
        for region in rles[channel]:
            if 'Fakeable_mcClosure' in region:
                continue
            for sample_name in rles[channel][region]:
                if sample_name not in validation_set:
                    validation_set[sample_name] = collections.OrderedDict()
                for central_or_shift in rles[channel][region][sample_name]:
                    if central_or_shift not in validation_set[sample_name]:
                        validation_set[sample_name][
                            central_or_shift] = collections.OrderedDict()
                    for rle in rles[channel][region][sample_name][
                            central_or_shift]:
                        if rle not in validation_set[sample_name][
                                central_or_shift]:
                            validation_set[sample_name][central_or_shift][
                                rle] = []
                        if region not in validation_set[sample_name][
                                central_or_shift][rle]:
                            validation_set[sample_name][central_or_shift][
                                rle].append(region)
        for sample_name in validation_set:
            has_errors_sample = False
            for central_or_shift in validation_set[sample_name]:
                for rle in validation_set[sample_name][central_or_shift]:
                    if len(validation_set[sample_name][central_or_shift]
                           [rle]) > 1:
                        logging.error(
                            "Found duplicates in channel {} and sample {} for event {}: regions {}"
                            .format(
                                channel, sample_name, rle,
                                ', '.join(validation_set[sample_name]
                                          [central_or_shift][rle])))
                        has_errors_sample = True
            if not has_errors_sample:
                logging.info(
                    'No overlapping events found between regions for sample {} in channel {}'
                    .format(sample_name, channel))
            has_errors = has_errors or has_errors_sample
    return has_errors
Exemple #10
0
def check_dir(dirname, use_force):
    if not os.path.isdir(dirname):
        if not use_force:
            logging.error("Directory '{output_dir}' does not exist".format(
                output_dir=dirname, ))
            return False
        else:
            logging.debug(
                "Creating directory '{output_dir}' since it's missing".format(
                    output_dir=dirname, ))
            try:
                os.makedirs(dirname)
            except IOError as err:
                logging.error(
                    "Caught an error while creating directory '{output_dir}': {reason}"
                    .format(
                        output_dir=dirname,
                        reason=err,
                    ))
                return False
    return True
Exemple #11
0
def skim_debug(out_filename, rle_list, tree_name = "tree"):
  '''Checks if skimming was successful by comparing RLE number in the output file to the given list of RLE numbers
  Args:
    out_filename: string,       Path to the file the RLE numbers of which is compared against the RLE array
    rle_list:     string array, List of RLE numbers as strings
    tree_name:    string,       TTree name (default: tree)

  Returns:
    True,  if the RLE numbers in the file match exactly to the given input list of RLE numbers
    False, otherwise
  '''
  logging.debug("Checking if {out_filename} contains exactly the same events as provided by the RLE file".format(
    out_filename = out_filename,
  ))
  if not hdfs.isfile(out_filename):
    return False

  out_rle_list = get_rle(out_filename, tree_name)

  missing_from_file = list(set(rle_list) - set(out_rle_list))
  excess_in_file    = list(set(out_rle_list) - set(rle_list))

  ret_val = True
  if missing_from_file:
    logging.error("There are {nof_missing} events missing from {out_filename}: {list_of_missing_events}".format(
      nof_missing            = len(missing_from_file),
      out_filename           = out_filename,
      list_of_missing_events = ', '.join(missing_from_file),
    ))
    ret_val = False
  if excess_in_file:
    logging.error("There are {nof_excess} event in excess in the file {out_filename}: {list_of_excess_events}".format(
      nof_excess            = len(excess_in_file),
      out_filename          = out_filename,
      list_of_excess_events = ', '.join(excess_in_file),
    ))
    ret_val = False

  return ret_val
Exemple #12
0
    def run(self, clean):
        record_software_state(self.sw_ver_file_cfg, self.sw_ver_file_out,
                              DEPENDENCIES)
        target = 'all'
        if clean:
            if not os.path.isfile(self.makefile_path):
                logging.error(
                    "The makefile %s is missing and therefore it's not possible to clean anything; "
                    "run sync Ntuple production first!" % self.makefile_path)
                sys.exit(1)
            target = 'clean'

        nof_parallel_jobs = len(self.channel_info)
        make_cmd = "make -f %s -j %d %s 2>%s 1>%s" % \
          (self.makefile_path, nof_parallel_jobs, target, self.stderr_file_path, self.stdout_file_path)
        if self.running_method.lower() == "makefile":
            run_dir = re.sub('^/home', '/scratch', self.config_dir)
            create_if_not_exists(run_dir)
            make_cmd = re.sub('^make', 'make -C {}'.format(run_dir), make_cmd)
        logging.info("Running the make command: %s" % make_cmd)
        run_cmd(make_cmd)
        logging.info("All done")
def validate_data(rles):
    has_errors = False
    for channel in rles:
        validation_set = collections.OrderedDict()
        has_errors_channel = False
        for region in rles[channel]:
            validation_set[region] = collections.OrderedDict()
            for sample_name in rles[channel][region]:
                if 'Run201' not in sample_name:
                    continue
                for central_or_shift in rles[channel][region][sample_name]:
                    for rle in rles[channel][region][sample_name][
                            central_or_shift]:
                        if rle not in validation_set[region]:
                            validation_set[region][
                                rle] = collections.OrderedDict()
                        if sample_name in validation_set[region][rle]:
                            validation_set[region][rle][sample_name].append(
                                central_or_shift)
                        else:
                            if validation_set[region][rle]:
                                logging.error(
                                    "Found duplicates in channel {} and region {} for event {}: samples {} and {}"
                                    .format(
                                        channel, region, rle, sample_name,
                                        ', '.join(validation_set[region]
                                                  [rle].keys())))
                                has_errors_channel = True
                            validation_set[region][rle][sample_name] = [
                                central_or_shift
                            ]
            if not has_errors_channel:
                logging.info(
                    'No overlapping data events found in channel {} and region {}'
                    .format(channel, region))
            has_errors = has_errors or has_errors_channel
    return has_errors
def validate(output_dir, verbose=False):
    '''Validates the job execution carried out by dump_rle_parallel()
  Args:
    output_dir: string, The directory where all RLE files are stored
    verbose:    bool,   Enable verbose output

  Returns:
    None

  The validation is quite basic: the program will loop over the subdirectories of output_dir,
  matches them against the dictionary entries specified by sample variable and counts the number
  of lines in each RLE file. If the number of files doesn't match to the number of entries in
  the corresponding ROOT file, the user will be notified about such discrepancies.

  In principle, the script could also print relevant commands to fix the issues (and dump them
  to an easily executable file) but let's leave it for another time.
  '''

    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    root_file_regex = re.compile('^tree_(\d+).root$')
    file_dict = {k: [] for k in ['excess', 'missing', 'corrupted']}

    try:

        for s_key, s_value in samples.iteritems():
            sample_name = s_value['process_name_specific']
            sample_dir = os.path.join(output_dir, sample_name)
            if os.path.isdir(sample_dir):
                logging.debug("Found sample directory {sample_dir}".format(
                    sample_dir=sample_dir))

                #NB! assume that there are no secondary paths in the dictionary (hence index 0!)
                sample_path_dict = s_value['local_paths'][0]
                sample_path = sample_path_dict['path']
                blacklist = sample_path_dict['blacklist']
                for sample_subdir in os.listdir(sample_path):
                    sample_subpath_idx = -1
                    try:
                        sample_subpath_idx = int(sample_subdir)
                    except ValueError:
                        continue
                    if sample_subpath_idx < 0:
                        raise ValueError("Internal error")
                    sample_subpath = os.path.join(sample_path, sample_subdir)
                    logging.debug(
                        "Processing sample subdirectory {sample_subpath}".
                        format(sample_subpath=sample_subpath))

                    for sample_file in os.listdir(sample_subpath):
                        sample_file_fullpath = os.path.join(
                            sample_subpath, sample_file)
                        if not sample_file.endswith(
                                '.root') or not os.path.isfile(
                                    sample_file_fullpath):
                            continue

                        root_file_regex_match = root_file_regex.search(
                            sample_file)
                        if not root_file_regex_match:
                            continue

                        root_file_idx = int(root_file_regex_match.group(1))
                        expected_rle_file_basename = '{root_file_idx}.txt'.format(
                            root_file_idx=root_file_idx)
                        expected_rle_file = os.path.join(
                            sample_dir, expected_rle_file_basename)
                        file_dict_entry = (expected_rle_file,
                                           sample_file_fullpath)
                        if root_file_idx in blacklist:
                            if os.path.isfile(expected_rle_file):
                                logging.warning(
                                    'Found RLE file {rle_file} (corresponding to blacklisted {root_file}) '
                                    'which you ought to delete'.format(
                                        rle_file=expected_rle_file,
                                        root_file=sample_file_fullpath,
                                    ))
                            file_dict['excess'].append(file_dict_entry)
                            continue

                        if not os.path.isfile(expected_rle_file):
                            logging.warning(
                                'Missing RLE file {rle_file} (corresponding to {root_file})'
                                .format(
                                    rle_file=expected_rle_file,
                                    root_file=sample_file_fullpath,
                                ))
                            file_dict['missing'].append(file_dict_entry)
                            continue
                        nof_rle_events = raw_linecount(expected_rle_file)
                        if nof_rle_events == 1 and os.path.getsize(
                                expected_rle_file) == 1:
                            # the RLE file contains only a newline, hence no events
                            nof_rle_events = 0

                        root_file = ROOT.TFile(sample_file_fullpath, 'read')
                        root_tree = root_file.Get('tree')
                        nof_entries = root_tree.GetEntries()

                        nof_events_diff = nof_rle_events - nof_entries
                        if nof_events_diff < 0:
                            logging.error(
                                'Missing {nof_events} events in {rle_filename} (corresponding to {sample_file}): '
                                'expected {expected}, got {actual}'.format(
                                    nof_events=abs(nof_events_diff),
                                    rle_filename=expected_rle_file,
                                    sample_file=sample_file_fullpath,
                                    expected=nof_entries,
                                    actual=nof_rle_events,
                                ))
                            file_dict['corrupted'].append(file_dict_entry)
                        elif nof_events_diff > 0:
                            logging.error(
                                'Got {nof_events} more event than expected in {rle_filename} (corresponding '
                                'to {sample_file}): expected {expected}, got {actual}'
                                .format(
                                    nof_events=nof_events_diff,
                                    rle_filename=expected_rle_file,
                                    sample_file=sample_file_fullpath,
                                    expected=nof_entries,
                                    actual=nof_rle_events,
                                ))
                            file_dict['corrupted'].append(file_dict_entry)
                        else:
                            logging.debug(
                                'File {rle_filename} (corresponding to {sample_file}) looks OK'
                                .format(
                                    rle_filename=expected_rle_file,
                                    sample_file=sample_file_fullpath,
                                ))

    except KeyboardInterrupt:
        pass

    if any(map(bool, file_dict.values())):
        logging.info('Validation finished with errors')
        for key in file_dict.keys():
            if file_dict[key]:
                logging.info('Number of {key} RLE files: {nof_key}'.format(
                    key=key, nof_key=len(file_dict[key])))
                for entry in file_dict[key]:
                    logging.info('{rle_file} <=> {sample_file}'.format(
                        rle_file=entry[0], sample_file=entry[1]))
    else:
        logging.info('Validation finished successfully')
    return
def dump_rle_parallel(output_dir,
                      rle_branchNames,
                      treeName,
                      nof_files=100,
                      force=False,
                      test=False,
                      verbose=False,
                      sample='',
                      tmp_dir=''):
    '''Dumps RLE numbers ,,in parallel''
  Args:
    output_dir:      string, Path to the directory where the RLE files will be stored
    rle_branchNames: dict { string : string }, Specifies the run, lumi and event branch names
    treeName:        string,                   Name of the TTree
    nof_files:       int,                      Number of files to be processed by one sbatch jobs
    force:           bool,                     If True, creates `output_dir` if it's not there
    test:            bool,                     If True, create jobs scripts but do not submit them to SLURM
    verbose:         bool,                     If True, prints lots of information to standard output
    sample:          string,                   (optional) sample name; if the sample name is not specified,
                                               all samples will be processed

  Returns:
    int array, List of sbatch job IDs that were submitted to SLURM
               This list can be used in checking if the jobs that were submitted in this routine are finished or not

  The method does the following things:
    1) loops over sample entries in 2016 dictionary (default) or selects only one sample (specified by `sample`)
    2) loops over all root files under sample directory and arranges them into chunks specified by `nof_files`
    3) creates a Python script and a Bash script which loops over the entries in the file
    4) submits each job to SLURM, unless `test` is True
    5) returns a list of sbatch job IDs that were assigned to each job
  '''
    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    if not os.path.isdir(output_dir):
        if not force:
            logging.error("Directory '{output_dir}' does not exist".format(
                output_dir=output_dir, ))
            sys.exit(1)
        else:
            logging.debug(
                "Creating directory '{output_dir}' since it's missing".format(
                    output_dir=output_dir, ))

    # let's make a temporary directories
    output_dir_tmp = os.path.join(output_dir,
                                  "tmp") if not tmp_dir else tmp_dir
    if not create_dir_if_not_exist(output_dir_tmp): sys.exit(1)
    output_dir_tmp_sh = os.path.join(output_dir_tmp, "sh")
    output_dir_tmp_py = os.path.join(output_dir_tmp, "py")
    output_dir_tmp_log = os.path.join(output_dir_tmp, "log")
    if not create_dir_if_not_exist(output_dir_tmp_sh): sys.exit(1)
    if not create_dir_if_not_exist(output_dir_tmp_py): sys.exit(1)
    if not create_dir_if_not_exist(output_dir_tmp_log): sys.exit(1)
    scratch_dir = "/scratch/{user_name}/dump_rle".format(
        user_name=getpass.getuser())

    idx = lambda x: int(x[x.rfind('_') + 1:x.rfind('.')])
    tree_pattern = re.compile("tree_\d+.root")

    jobId = 0
    root_files, remote_output, local_output = [], [], []

    found_sample_name = False
    sbatch_job_ids = []
    for s_key, s_value in samples.iteritems():
        sample_name = s_value['process_name_specific']
        if sample and sample_name != sample:
            continue
        found_sample_name = True

        sample_path = s_value['local_paths'][0]['path']
        logging.debug("Processing sample '{sample_name}'".format(
            sample_name=sample_name, ))

        output_dir_parent = os.path.join(output_dir, sample_name)
        if not os.path.isdir(output_dir_parent):
            os.makedirs(output_dir_parent)

        for sample_subdir_basename in os.listdir(sample_path):
            sample_subdir = os.path.join(sample_path, sample_subdir_basename)

            for rootfile_basename in os.listdir(sample_subdir):
                tree_match = tree_pattern.match(rootfile_basename)
                if not tree_match:
                    continue

                rootfile_idx = idx(rootfile_basename)
                root_files.append(
                    os.path.join(sample_subdir, rootfile_basename))
                local_output.append(
                    os.path.join(output_dir_parent,
                                 "{i}.txt".format(i=rootfile_idx)))
                remote_output.append(
                    os.path.join(scratch_dir, str(jobId), sample_name,
                                 os.path.basename(local_output[-1])))

                if len(root_files) == nof_files:
                    sh_path = os.path.join(output_dir_tmp_sh,
                                           "{i}.sh".format(i=jobId))
                    py_path = os.path.join(output_dir_tmp_py,
                                           "{i}.py".format(i=jobId))
                    log_path = os.path.join(output_dir_tmp_log,
                                            "{i}.log".format(i=jobId))
                    scratch_job_dir = os.path.join(
                        os.path.join(scratch_dir, str(jobId)))
                    sbatch_job_id = bake_job(
                        sh_path,
                        rle_branchNames,
                        treeName,
                        py_path,
                        scratch_job_dir,
                        zip(root_files, remote_output, local_output),
                        log_path,
                        not test,
                    )
                    if sbatch_job_id:
                        sbatch_job_ids.append(sbatch_job_id)
                    logging.debug("Creating job {jobId}".format(jobId=jobId))
                    root_files, remote_output, local_output = [], [], []
                    jobId += 1

    if sample and not found_sample_name:
        logging.error(
            "Sample name '{sample_name}' does not exist in the sample dictionary"
            .format(sample_name=sample))
        sys.exit(1)

    if root_files:
        sh_path = os.path.join(output_dir_tmp_sh, "{i}.sh".format(i=jobId))
        py_path = os.path.join(output_dir_tmp_py, "{i}.py".format(i=jobId))
        log_path = os.path.join(output_dir_tmp_log, "{i}.log".format(i=jobId))
        scratch_job_dir = os.path.join(os.path.join(scratch_dir, str(jobId)))
        sbatch_job_id = bake_job(
            sh_path,
            rle_branchNames,
            treeName,
            py_path,
            scratch_job_dir,
            zip(root_files, remote_output, local_output),
            log_path,
            not test,
        )
        if sbatch_job_id:
            sbatch_job_ids.append(sbatch_job_id)
        logging.debug("Creating job {jobId}".format(jobId=jobId))

    logging.debug("Done!")
    return map(int, sbatch_job_ids)
Exemple #16
0
def generate_sbatch_line(
    executable,
    command_line_parameter,
    input_file_names,
    output_file_name,
    script_file_name,
    log_file_name=None,
    cvmfs_error_log=None,
    min_file_size=20000,
    job_template_file='sbatch-node.sh.template',
    validate_outputs=True,
    copy_output_file=True,
):
    #------------------------------------------------------------------------------------------------
    # CV: This check (which considerably slows down the execution of 'python tthAnalyzeRun_XXX.py') is no longer needed,
    #     as the template scripts in https://github.com/HEP-KBFI/tth-htt/tree/master/python/templates
    #     perform this check before starting to process the batch job
    #     and skip the batch job in case the output file already exists.
    #
    #if is_file_ok(output_file_name, validate_outputs, min_file_size):
    #  return None
    #------------------------------------------------------------------------------------------------

    if log_file_name and os.path.exists(log_file_name):
        time = None
        hostname = None
        is_cvmfs_error = False
        with open(log_file_name, 'r') as log_file:
            for line in log_file:
                if line.find("Time") != -1:
                    time = line.split(':')[1].strip()
                if line.find("Hostname") != -1:
                    hostname = line.split(':')[1].strip()
                if line.find("Transport endpoint is not connected") != -1:
                    is_cvmfs_error = True
        if is_cvmfs_error:
            logging.error(
                "Problem with cvmfs access reported in log file = '%s':" %
                log_file_name)
            logging.error(" host = '%s': time = %s" % (hostname, time))
            if cvmfs_error_log:
                if not hostname in cvmfs_error_log.keys():
                    cvmfs_error_log[hostname] = []
                cvmfs_error_log[hostname].append(time)

    if type(input_file_names) is str:
        input_file_names = [input_file_names]

    input_file_names_duplicates = find_duplicates(input_file_names)
    if input_file_names_duplicates:
        raise RuntimeError(
          "Found duplicate input files to produce output file %s: %s" % \
          (output_file_name, ", ".join(input_file_names_duplicates))
        )

    submissionStatement = "m.submitJob(\n"                            \
      "  inputFiles             = {input_file_names},\n"              \
      "  executable             = '{executable}',\n"                  \
      "  command_line_parameter = '{command_line_parameter}',\n"      \
      "  outputFilePath         = '{output_file_name}',\n"            \
      "  outputFiles            = {output_file_basename},\n"          \
      "  scriptFile             = '{script_file_name}',\n"            \
      "  logFile                = '{log_file_name}',\n"               \
      "  skipIfOutputFileExists = {skipIfOutputFileExists},\n"        \
      "  job_template_file      = '{job_template_file}',\n"           \
      "  copy_output_file       = {copy_output_file},\n"              \
      ")".format(
        input_file_names       = input_file_names,
        executable             = executable,
        command_line_parameter = command_line_parameter,
        output_file_name       = os.path.dirname(output_file_name) if output_file_name else '',
        output_file_basename   = [ os.path.basename(output_file_name) ] if output_file_name else [],
        script_file_name       = script_file_name,
        log_file_name          = log_file_name,
        skipIfOutputFileExists = False,
        job_template_file      = job_template_file,
        copy_output_file       = copy_output_file,
    )
    return submissionStatement
def validate_denom(output_file, samples):
    error_code = 0
    if not os.path.isfile(output_file):
        logging.error('File {} does not exist'.format(output_file))
        return 1
    histogram_file = ROOT.TFile.Open(output_file, 'read')
    if not histogram_file:
        logging.error('Not a valid ROOT file: {}'.format(output_file))
        return 2
    category_sums = {}
    for sample_name, sample_info in samples.items():
        if not sample_info["use_it"]:
            continue
        process_name = sample_info["process_name_specific"]
        category_name = sample_info["sample_category"]
        expected_nof_events = sample_info["nof_tree_events"]
        if category_name not in category_sums:
            category_sums[category_name] = 0
        category_sums[category_name] += expected_nof_events
        logging.info('Validating {} (expecting {} events)'.format(
            process_name, expected_nof_events))
        histogram = histogram_file.Get(process_name)
        if not histogram:
            logging.error("Could not find histogram '{}' in file {}".format(
                process_name, output_file))
            error_code = 3
            continue
        nof_events = int(histogram.GetEntries())
        if nof_events != expected_nof_events:
            logging.error(
                'Histogram {} in file {} has {} events, but expected {} events'
                .format(
                    process_name,
                    output_file,
                    nof_events,
                    expected_nof_events,
                ))
            error_code = 4
        else:
            logging.info(
                'Validation successful for sample {}'.format(process_name))

    for category_name, expected_nof_events in category_sums.items():
        histogram = histogram_file.Get(category_name)
        if not histogram:
            logging.error("Could not find histogram '{}' in file {}".format(
                category_name, output_file))
            error_code = 3
        nof_events = int(histogram.GetEntries())
        if nof_events != expected_nof_events:
            logging.error(
                'Histogram {} in file {} has {} events, but expected {} events'
                .format(
                    category_name,
                    output_file,
                    nof_events,
                    expected_nof_events,
                ))
            error_code = 4
        else:
            logging.info(
                'Validation successful for category {}'.format(category_name))

    histogram_file.Close()
    if error_code == 0:
        logging.info("Validation successful!")
    else:
        logging.error("Validation failed!")
    return error_code
Exemple #18
0
                        default=False,
                        help='R|Enable verbose printout')
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    rle_file = args.file
    sample_name = args.sample_name
    output_file = args.output
    grep_directory = args.directory
    grep_individually = args.all
    try:
        sample_name_re = re.compile(sample_name)
    except:
        logging.error(
            "Argument {arg} not a valid regex".format(arg=sample_name))
        sys.exit(1)

    if grep_individually and not grep_directory:
        logging.warning(
            'Option -a/--all has no effect unless you specify -d/--directory')

    if not hdfs.isfile(rle_file):
        logging.error("No such file: '{rle_filename}'".format(
            rle_filename=rle_file, ))
        sys.exit(1)

    if output_file and not hdfs.isdir(os.path.dirname(output_file)):
        logging.error(
            "Parent directory of '{output_file}' doesn't exist".format(
                output_file=output_file, ))
Exemple #19
0
    def create(self):
        """Creates all necessary config files and runs the MEM -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        # read the file in, sample-by-sample
        # build the dictionary recursively
        # add rle file also to generated cfg files
        # print integrations per job as well!
        # consider more than 1 file per jobs -- the jobs are splitted by MEM integration anyways

        rle_filters = self.get_filter() if self.rle_filter_file else {}
        statistics = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            if not os.path.exists(sample_info['local_paths'][0]['path']):
                logging.warning("Skipping sample {sample_name}".format(sample_name = sample_name))
                continue

            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_addMEM, process_name))
            is_mc = (sample_info["type"] == "mc")
            if self.rle_filter_file:
                assert(process_name in rle_filters)

            inputFileList = generateInputFileList(sample_info, self.max_files_per_job)
            # typically, the analysis ends here and starts looping b/c the smallest unit of work processes
            # at least one file; we need, however, to split the file into event ranges in such a way that
            # each job performs mem_integrations_per_job MEM integrations

            # so what we are going to do is to open each set of files in inputFileList, read the variable
            # requestMEM_*l_*tau and try to gather the event ranges such that each event range
            # performs up to mem_integrations_per_job integrations per job
            memEvtRangeDict = self.memJobList(inputFileList, rle_filters[process_name] if self.rle_filter_file else [])

            for jobId in memEvtRangeDict.keys():

                key_dir = getKey(sample_name)
                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = memEvtRangeDict[jobId]['input_fileset']

                # there should always be a job
                assert(self.inputFiles[key_file] > 0), "More than one input file: %s ?? !!" % \
                                                       ', '.join(self.inputFiles[key_file])

                #assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!"
                self.cfgFiles_addMEM_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i_cfg.py" % (self.channel, process_name, jobId)
                )
                self.shFiles_addMEM_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i.sh" % (self.channel, process_name, jobId)
                )
                self.outputFiles[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % (process_name, jobId)
                )
                self.logFiles_addMEM[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS], "addMEM_%s_%s_%i.log" % (self.channel, process_name, jobId)
                )
                self.logFiles_addMEM[key_file] = get_log_version((self.logFiles_addMEM[key_file],))[0]
                self.createCfg_addMEM(
                    self.inputFiles[key_file],
                    memEvtRangeDict[jobId]['event_range'][0],
                    memEvtRangeDict[jobId]['event_range'][1],
                    self.outputFiles[key_file],
                    self.era,
                    sample_info["sample_category"],
                    is_mc,
                    self.cfgFiles_addMEM_modified[key_file],
                    memEvtRangeDict[jobId]['whitelist'],
                )

                # associate the output file with the fileset_id
                #UDPATE: ONE OUTPUT FILE PER SAMPLE!
                fileset_id = memEvtRangeDict[jobId]['fileset_id']
                hadd_output_dir = os.path.join(
                    self.dirs[key_dir][DKEY_FINAL_NTUPLES],
                    '%04d' % (fileset_id // 1000)
                )
                if not os.path.exists(hadd_output_dir):
                    os.makedirs(hadd_output_dir)
                hadd_output = os.path.join(
                    hadd_output_dir, '%s_%i.root' % ('tree', fileset_id) # UDPATE: ADDED
                    #hadd_output_dir, "tree.root" # UDPATE: REMOVED
                )
                if hadd_output not in self.hadd_records:
                    self.hadd_records[hadd_output] = {}
                    self.hadd_records[hadd_output]['output_files'] = []
                self.hadd_records[hadd_output]['fileset_id'] = fileset_id
                self.hadd_records[hadd_output]['output_files'].append(self.outputFiles[key_file])
                self.hadd_records[hadd_output]['process_name'] = process_name
                #self.filesToClean.append(self.outputFiles[key_file])

            # let's sum the number of integration per sample
            nofEntriesMap = {}
            for v in memEvtRangeDict.values():
                if v['fileset_id'] not in nofEntriesMap:
                    nofEntriesMap[v['fileset_id']] = {
                        'nof_entries' : v['nof_entries'],
                    }
            statistics[process_name] = {
                'nof_int'         : sum([entry['nof_int']         for entry in memEvtRangeDict.values()]),
                'nof_entries'     : sum([entry['nof_entries']     for entry in nofEntriesMap.values()]),
                'nof_events_pass' : sum([entry['nof_events_pass'] for entry in memEvtRangeDict.values()]),
                'nof_int_pass'    : sum([entry['nof_int_pass']    for entry in memEvtRangeDict.values()]),
                'nof_zero'        : sum([entry['nof_zero']        for entry in memEvtRangeDict.values()]),
                'nof_jobs'        : len(memEvtRangeDict),
            }

        if self.is_sbatch:
            logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addMEM)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_addMEM(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        self.createMakefile(lines_makefile)

        ws_len = max([len(kk) + 1 for kk in statistics.keys()])
        total_nof_integrations_sum = sum(x['nof_int']            for x in statistics.values())
        total_nof_entires          = sum(x['nof_entries']        for x in statistics.values())
        total_nof_zero_int         = sum(x['nof_zero']           for x in statistics.values())
        total_nof_jobs             = sum(x['nof_jobs']           for x in statistics.values())
        total_nof_pass             = sum(x['nof_events_pass']    for x in statistics.values())
        total_nof_int_pass_avg     = float(sum(x['nof_int_pass'] for x in statistics.values())) / total_nof_pass
        total_nof_integrations_avg = float(total_nof_integrations_sum) / total_nof_entires
        total_nof_int_per_job = float(total_nof_integrations_sum) / total_nof_jobs
        for k, v in statistics.iteritems():
            if v['nof_entries'] == 0:
                int_per_event = 0.
                evt_pass = 0.
            else:
                int_per_event = float(v['nof_int']) / v['nof_entries']
                evt_pass = (100 * float(v['nof_events_pass']) / v['nof_entries'])
            if v['nof_events_pass'] == 0:
                nof_int_pass = 0.
            else:
                nof_int_pass = float(v['nof_int_pass']) / v['nof_events_pass']
            print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d (%.2f%%) evt pass; %.2f int/evt pass; %d evt 0int)' %
              (k,
               ' ' * (ws_len - len(k)),
               v['nof_int'],
               v['nof_entries'],
               v['nof_jobs'],
               int_per_event,
               v['nof_events_pass'],
               evt_pass,
               nof_int_pass,
               v['nof_zero'],
              )
            )
        print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d evt pass; %.2f int/evt pass; '
              '%.2f int/job pass; %d evt 0int)' %
          ('total',
           ' ' * (ws_len - len('total')),
           total_nof_integrations_sum,
           total_nof_entires,
           total_nof_jobs,
           total_nof_integrations_avg,
           total_nof_pass,
           total_nof_int_pass_avg,
           total_nof_int_per_job,
           total_nof_zero_int,
          )
        )

        if self.max_mem_integrations > 0 and total_nof_integrations_sum > self.max_mem_integrations:
            logging.error("Will not start the jobs (max nof integrations exceeded)!")
            return False
        else:
            logging.info("Done")
            return True
Exemple #20
0
    def check_job_completion(self,
                             jobsId_list,
                             default_completion=Status.completed):
        completion = {
            k: JobCompletion(status=default_completion)
            for k in jobsId_list
        }

        # If the input list is empty, just return here (we don't want to mess up the subprocess commands here)
        if not completion:
            return completion

        # Set a delimiter, which distinguishes entries b/w different jobs
        delimiter = ','

        # First, let's try with sacct; explanation:
        # 1) sacct -X -P -n -o JobID,ExitCode,DerivedExitCode,State
        #      Shows job IDs, exit codes and comments of all submitted, running and finished jobs, one line per job
        #        a) -X -- shows cumulative statistics of each job (has no effect here, though)
        #        b) -P -- output will be '|' delimited without a '|' at the end
        #        c) -n -- omit header
        #        d) -o JobID,ExitCode,DerivedExitCode -- output format
        #        e) -S {datetime} -- look only for jobs submitted after {datetime}
        #        f) -j {jobs} -- filter out only the relevant jobs by their job ID (comma-separated list)
        # 2) sed ':a;N;$!ba;s/\\n/{delimiter}/g'
        #      Place all entries to one line, delimited by {{delimiter}} (otherwise the logs are hard to read)
        sacct_cmd = "sacct -X -P -n -o JobID,ExitCode,DerivedExitCode,State -S {datetime} -j {jobs} | " \
                    "sed ':a;N;$!ba;s/\\n/{delimiter}/g'".format(
          datetime  = self.datetime,
          jobs      = ','.join(jobsId_list),
          delimiter = delimiter,
        )
        sacct_out, sacct_err = run_cmd(sacct_cmd,
                                       do_not_log=not self.log_completion,
                                       return_stderr=True)
        if not sacct_err and sacct_out:
            # The output of sacct contains one line per job, each line has pipe-separated fields the order of which
            # is defined in the command that issued the output
            lines = sacct_out.split(delimiter)
            for line in lines:
                JobID, ExitCode, DerivedExitCode, State = line.split('|')
                if JobID in completion:
                    completion[JobID] = JobCompletion(
                        status=Status.classify_error(ExitCode, DerivedExitCode,
                                                     State),
                        exit_code=ExitCode,
                        derived_exit_code=DerivedExitCode,
                        state=State,
                    )
            return completion
        else:
            # Likely returned along the lines of (due to heavy load on the cluster since SQL DB is overloaded):
            # sacct: error: Problem talking to the database: Connection refused
            logging.info('sacct currently unavailable: %s' % sacct_err)

        # Let's try with scontrol if the sacct commands failed
        # scontrol doesn't have an option to take a list of Job IDs as an argument; thus, we have to grep the job IDs
        # Explanation:
        # 1) scontrol show -od job
        #      Prints out everything about running or recently finished jobs
        #        a) -o -- prints information one line per record
        #        b) -d -- includes more detailed information about the job
        #        c) job -- prints all jobs (it's possible to get information about other units like nodes and clusters)
        # 2) grep '{jobs}'
        #      Filter out jobs by their job ID (by concatenating the list with escaped regex OR operator '|')
        # 3) sed ':a;N;$!ba;s/\\n/{delimiter}/g'
        #      Put all the result on one line, where each record is delimited by {delimiter}
        scontrol_cmd = "scontrol show -od job | grep '{jobs}' | sed ':a;N;$!ba;s/\\n/{delimiter}/g'".format(
            jobs='\\|'.join(jobsId_list),
            delimiter=delimiter,
        )
        scontrol_out, scontrol_err = run_cmd(
            scontrol_cmd,
            do_not_log=not self.log_completion,
            return_stderr=True)
        if not scontrol_err and scontrol_out:
            # The output of scontrol contains one entry per line, each line contains a space-delimited key-value pairs,
            # whereas the keys and values are separated by an equation sign
            # Although the keys do not contain any spaces, the values might, so we have to take care of that
            lines = scontrol_out.split(delimiter)
            for line in lines:
                line_dict = {}
                line_split_eq_spaces = map(lambda x: x.split(),
                                           line.split('='))
                for i in range(len(line_split_eq_spaces) - 1):
                    k = line_split_eq_spaces[i]
                    v = line_split_eq_spaces[i + 1]
                    line_dict[k[-1]] = ' '.join(
                        v[:-1] if i != len(line_split_eq_spaces) - 2 else v)
                if not 'JobId' in line_dict.keys():
                    print("Skipping line = '%s'" % line)
                    continue
                JobId = line_dict['JobId']
                if JobId in completion:
                    completion[JobId] = JobCompletion(
                        status=Status.classify_error(
                            line_dict['ExitCode'],
                            line_dict['DerivedExitCode'],
                            line_dict['JobState'],
                        ),
                        exit_code=line_dict['ExitCode'],
                        derived_exit_code=line_dict['DerivedExitCode'],
                        state=line_dict['JobState'])
            return completion
        else:
            # scontrol probably returned something like:
            # slurm_load_jobs error: Invalid job id specified
            # Probably because too much time has passed since the job completion and checking the exit status here
            logging.info('scontrol has errors: %s' % scontrol_err)

        # scontrol still might fail if too much time has passed since the jobs completion (the metadata about each
        # job is cached for a certain period of time, the length of which I don't know at the moment)
        # None of the SLURM commands work; let's just say that the job completed successfully
        logging.error(
            "Cannot tell if the job has completed successfully or not!")
        return completion
def generate_sbatch_line(
    executable,
    command_line_parameter,
    input_file_names,
    output_file_name,
    script_file_name,
    log_file_name=None,
    cvmfs_error_log=None,
    min_file_size=20000,
    job_template_file='sbatch-node.sh.template',
    validate_outputs=True,
):
    if is_file_ok(output_file_name, validate_outputs, min_file_size):
        return None

    if log_file_name and os.path.exists(log_file_name):
        time = None
        hostname = None
        is_cvmfs_error = False
        with open(log_file_name, 'r') as log_file:
            for line in log_file:
                if line.find("Time") != -1:
                    time = line.split(':')[1].strip()
                if line.find("Hostname") != -1:
                    hostname = line.split(':')[1].strip()
                if line.find("Transport endpoint is not connected") != -1:
                    is_cvmfs_error = True
        if is_cvmfs_error:
            logging.error(
                "Problem with cvmfs access reported in log file = '%s':" %
                log_file_name)
            logging.error(" host = '%s': time = %s" % (hostname, time))
            if cvmfs_error_log:
                if not hostname in cvmfs_error_log.keys():
                    cvmfs_error_log[hostname] = []
                cvmfs_error_log[hostname].append(time)

    if type(input_file_names) is str:
        input_file_names = [input_file_names]

    submissionStatement = "m.submitJob(\n"                            \
      "  inputFiles             = {input_file_names},\n"              \
      "  executable             = '{executable}',\n"                  \
      "  command_line_parameter = '{command_line_parameter}',\n"      \
      "  outputFilePath         = '{output_file_name}',\n"            \
      "  outputFiles            = {output_file_basename},\n"          \
      "  scriptFile             = '{script_file_name}',\n"            \
      "  logFile                = '{log_file_name}',\n"               \
      "  skipIfOutputFileExists = {skipIfOutputFileExists},\n"        \
      "  job_template_file      = '{job_template_file}',\n"           \
      ")".format(
        input_file_names       = input_file_names,
        executable             = executable,
        command_line_parameter = command_line_parameter,
        output_file_name       = os.path.dirname(output_file_name) if output_file_name else '',
        output_file_basename   = [ os.path.basename(output_file_name) ] if output_file_name else [],
        script_file_name       = script_file_name,
        log_file_name          = log_file_name,
        skipIfOutputFileExists = str(bool(output_file_name)),
        job_template_file      = job_template_file,
    )
    return submissionStatement
Exemple #22
0
    def poll(self, nonBlocking):
        """Waits for all sbatch jobs submitted by this instance of sbatchManager to finish processing
        """
        text_line = '-' * 120

        # Set a delimiter, which distinguishes entries b/w different jobs
        delimiter = ','
        # Explanation (the maximum pool ID length = 256 is configurable via self.max_pool_id_length):
        # 1) squeue -h -u {{user}} -o '%i %256k'
        #      Collects the list of running jobs
        #        a) -h omits header
        #        b) -u {{user}} looks only for jobs submitted by {{user}}
        #        c) -o '%i %256k' specifies the output format
        #           i)  %i -- job ID (1st column)
        #           ii) %256k -- comment with width of 256 characters (2nd column)
        #               If the job has no comments, the entry simply reads (null)
        # 2) grep {{comment}}
        #       Filter the jobs by the comment which must be unique per sbatchManager instance at all times
        # 3) awk '{print $1}'
        #       Filter only the jobIds out
        # 4) sed ':a;N;$!ba;s/\\n/{{delimiter}}/g'
        #       Place all job IDs to one line, delimited by {{delimiter}} (otherwise the logs are hard to read)
        command_template = "squeue -h -u {{user}} -o '%i %{{ pool_id_length }}k' | grep {{comment}} | awk '{print $1}' | " \
                           "sed ':a;N;$!ba;s/\\n/{{delimiter}}/g'"
        command = jinja2.Template(command_template).render(
            user=self.user,
            pool_id_length=self.max_pool_id_length,
            comment=self.pool_id,
            delimiter=delimiter)

        # Initially, all jobs are marked as submitted so we have to go through all jobs and check their exit codes
        # even if some of them have already finished
        jobIds_set = set([
            job_id for job_id in self.submittedJobs
            if self.submittedJobs[job_id]['status'] == Status.submitted
        ])
        nofJobs_left = len(jobIds_set) + len(self.queuedJobs)
        while nofJobs_left > 0:
            # Get the list of jobs submitted to batch system and convert their jobIds to a set
            poll_result, poll_result_err = '', ''
            while True:
                poll_result, poll_result_err = run_cmd(command,
                                                       do_not_log=False,
                                                       return_stderr=True)
                if not poll_result and poll_result_err:
                    logging.warning(
                        'squeue caught an error: {squeue_error}'.format(
                            squeue_error=poll_result_err))
                else:
                    break
                # sleep a minute and then try again
                # in principle we could limit the number of retries, but hopefully that's not necessary
                logging.debug("sleeping for %i seconds." % 60)
                time.sleep(60)
            polled_ids = set()
            if poll_result != '':
                polled_ids = set(poll_result.split(delimiter))

            # Check if number of jobs submitted to batch system is below maxSubmittedJobs;
            # if it is, take jobs from queuedJobs list and submit them,
            # until a total of maxSubmittedJobs is submitted to batch system
            nofJobs_toSubmit = min(len(self.queuedJobs),
                                   self.maxSubmittedJobs - len(polled_ids))
            if nofJobs_toSubmit > 0:
                logging.debug(
                    "Jobs: submitted = {}, in queue = {} --> submitting the next {} jobs."
                    .format(len(polled_ids), len(self.queuedJobs),
                            nofJobs_toSubmit))
            else:
                logging.debug(
                    "Jobs: submitted = {}, in queue = {} --> waiting for submitted jobs to finish processing."
                    .format(len(polled_ids), len(self.queuedJobs)))
            for i in range(0, nofJobs_toSubmit):
                # randomly submit a job from the queue
                two_pow_sixteen = 65536
                random.seed((abs(hash(uuid.uuid4()))) % two_pow_sixteen)
                max_idx = len(self.queuedJobs) - 1
                random_idx = random.randint(0, max_idx)
                job = self.queuedJobs.pop(random_idx)
                job['status'] = Status.submitted
                job_id = self.submit(job['sbatch_command'])
                self.submittedJobs[job_id] = job

            # Now check status of jobs submitted to batch system:
            # Subtract the list of running jobs from the list of all submitted jobs -- the result is a list of
            # jobs that have finished already
            finished_ids = list(jobIds_set - polled_ids)

            # Do not poll anything if currently there are no finished jobs
            if finished_ids:
                # Based on job's exit code what if the job has failed or completed successfully
                # However, the sacct/scontrol commands yield too much output if too many jobs have been submitted here
                # Therefore, we want to restrict the output by grepping specific job IDs
                # There's another problem with that: the length of a bash command is limited by ARG_MAX kernel variable,
                # which is of order 2e6
                # This means that we have to split the job IDs into chunks each of which we have to check separately
                finished_ids_chunks = [
                    finished_ids[i:i + self.max_nof_greps]
                    for i in range(0, len(finished_ids), self.max_nof_greps)
                ]
                for finished_ids_chunk in finished_ids_chunks:
                    completion = self.check_job_completion(finished_ids_chunk)
                    completed_jobs, running_jobs, failed_jobs = [], [], []
                    for job_id, details in completion.iteritems():
                        if details.status == Status.completed:
                            completed_jobs.append(job_id)
                        elif details.status == Status.running:
                            running_jobs.append(job_id)
                        else:
                            failed_jobs.append(job_id)
                    # If there are any failed jobs, throw
                    if failed_jobs:

                        failed_jobs_str = ','.join(failed_jobs)
                        errors = [
                            completion[job_id].status for job_id in failed_jobs
                        ]
                        logging.error(
                            "Job(s) w/ ID(s) {jobIds} finished with errors: {reasons}"
                            .format(
                                jobIds=failed_jobs_str,
                                reasons=', '.join(map(Status.toString,
                                                      errors)),
                            ))

                        # Let's print a table where the first column corresponds to the job ID
                        # and the second column lists the exit code, the derived exit code, the status
                        # and the classification of the failed job
                        logging.error("Error table:")
                        for job_id in failed_jobs:
                            sys.stderr.write(
                                "{jobId} {exitCode} {derivedExitCode} {state} {status}\n"
                                .format(
                                    jobId=job_id,
                                    exitCode=completion[job_id].exit_code,
                                    derivedExitCode=completion[job_id].
                                    derived_exit_code,
                                    state=completion[job_id].state,
                                    status=Status.toString(
                                        completion[job_id].status),
                                ))

                        sys.stderr.write('%s\n' % text_line)
                        for failed_job in failed_jobs:
                            for log in zip(['wrapper', 'executable'],
                                           ['log_wrap', 'log_exec']):
                                logfile = self.submittedJobs[failed_job][
                                    log[1]]
                                if os.path.isfile(logfile):
                                    logfile_contents = open(logfile,
                                                            'r').read()
                                else:
                                    logfile_contents = '<file is missing>'
                                sys.stderr.write(
                                    'Job ID {id} {description} log ({path}):\n{line}\n{log}\n{line}\n'
                                    .format(
                                        id=failed_job,
                                        description=log[0],
                                        path=logfile,
                                        log=logfile_contents,
                                        line=text_line,
                                    ))

                            if self.submittedJobs[failed_job]['nof_submissions'] < self.max_resubmissions and \
                               completion[failed_job].status == Status.io_error:
                                # The job is eligible for resubmission if the job hasn't been resubmitted more
                                # than a preset limit of resubmissions AND if the job failed due to I/O errors
                                logging.warning(
                                    "Job w/ ID {id} and arguments {args} FAILED because: {reason} "
                                    "-> resubmission attempt #{attempt}".
                                    format(
                                        id=failed_job,
                                        args=self.submittedJobs[failed_job]
                                        ['args'],
                                        reason=Status.toString(
                                            completion[failed_job].status),
                                        attempt=self.submittedJobs[failed_job]
                                        ['nof_submissions'],
                                    ))
                                self.submitJob(
                                    *self.submittedJobs[failed_job]['args'])
                                # The old ID must be deleted, b/c otherwise it would be used to compare against
                                # squeue output and we would resubmit the failed job ad infinitum
                                del self.submittedJobs[failed_job]
                            else:
                                # We've exceeded the maximum number of resubmissions -> fail the workflow
                                raise Status.raiseError(
                                    completion[failed_job].status)
                    else:
                        logging.debug(
                            "Job(s) w/ ID(s) {completedIds} finished successfully {runningInfo}"
                            .format(
                                completedIds=','.join(completed_jobs),
                                runningInfo='(%s still running)' %
                                ','.join(running_jobs) if running_jobs else '',
                            ))
                    # Mark successfully finished jobs as completed so that won't request their status code again
                    # Otherwise they will be still at ,,submitted'' state
                    for job_id in completed_jobs:
                        if not all(
                                map(
                                    lambda outputFile: is_file_ok(
                                        outputFile,
                                        validate_outputs=True,
                                        min_file_size=self.min_file_size), self
                                    .submittedJobs[job_id]['outputFiles'])):
                            if self.submittedJobs[job_id][
                                    'nof_submissions'] < self.max_resubmissions:
                                logging.warning(
                                    "Job w/ ID {id} and arguments {args} FAILED to produce a valid output file "
                                    "-> resubmission attempt #{attempt}".
                                    format(
                                        id=job_id,
                                        args=self.submittedJobs[job_id]
                                        ['args'],
                                        attempt=self.submittedJobs[job_id]
                                        ['nof_submissions'],
                                    ))
                                self.submitJob(
                                    *self.submittedJobs[job_id]['args'])
                                del self.submittedJobs[job_id]
                            else:
                                raise ValueError(
                                    "Job w/ ID {id} FAILED because it repeatedly produces bogus output "
                                    "file {output} yet the job still exits w/o any errors"
                                    .format(
                                        id=job_id,
                                        output=', '.join(
                                            self.submittedJobs[job_id]
                                            ['outputFiles']),
                                    ))
                        else:
                            # Job completed just fine
                            self.submittedJobs[job_id][
                                'status'] = Status.completed

            jobIds_set = set([
                job_id for job_id in self.submittedJobs
                if self.submittedJobs[job_id]['status'] == Status.submitted
            ])
            nofJobs_left = len(jobIds_set) + len(self.queuedJobs)
            logging.info(
                "Waiting for sbatch to finish (%d job(s) still left) ..." %
                nofJobs_left)
            if nofJobs_left > 0:
                if nonBlocking:
                    return False
                two_pow_sixteen = 65536
                random.seed((abs(hash(uuid.uuid4()))) % two_pow_sixteen)
                max_delay = 300
                random_delay = random.randint(0, max_delay)
                logging.debug("sleeping for %i seconds." % random_delay)
                time.sleep(self.poll_interval + random_delay)
            else:
                break

        return True
Exemple #23
0
  args = parser.parse_args()

  if args.verbose:
    logging.getLogger().setLevel(logging.DEBUG)

  rle_filename = args.input
  out_filename = os.path.abspath(args.output)
  grep_dir     = args.directory
  sample_name  = args.sample_name
  force        = args.force
  debug_output = args.debug
  nof_files    = args.nof_files

  # check if input RLE file exists
  if not hdfs.isfile(rle_filename):
    logging.error("File {rle_filename} does not exist or is not a file!".format(rle_filename = rle_filename))
    sys.exit(1)

  # check if the directory into which we have to write the output ROOT file already exists
  out_parent_dir = os.path.dirname(out_filename)
  if not hdfs.isdir(out_parent_dir):
    if not force:
      logging.error("Parent directory of the output file {out_filename} does not exist".format(
        out_filename = out_filename),
      )
      sys.exit(1)
    else:
      logging.debug("Output directory {out_parent_dir} does not exist, attempting to create one".format(
        out_parent_dir = out_parent_dir,
      ))
      try:
Exemple #24
0
                        action='store_true',
                        default=False,
                        help='R|Enable verbose printout')
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    use_force = args.force
    output = args.output

    if args.input:

        input_file = args.input
        if not os.path.isfile(input_file):
            logging.error("No such file: {input_filename}".format(
                input_filename=input_file))
            sys.exit(1)

        output_file = output
        parent_dir = os.path.dirname(os.path.abspath(output_file))
        if not check_dir(parent_dir, use_force):
            sys.exit(1)

        logging.debug(
            "Saving RLE numbers from {input_file} to {output_file}".format(
                input_file=input_file,
                output_file=output_file,
            ))

        dump_rle(input_file, output_file, args.tree, args.run, args.lumi,
                 args.event)
def get_rles(input_paths, whitelist, blacklist, read_all_systematics):
    has_errors = False
    rles = collections.OrderedDict()
    valid_paths = get_paths(input_paths, whitelist, blacklist)
    for channel_name, channel_dir in valid_paths.items():
        rles[channel_name] = collections.OrderedDict()
        for region_dir in sorted(hdfs.listdir(channel_dir)):
            region_name = os.path.basename(region_dir)
            logging.debug('Found region {} in channel {}'.format(
                channel_name, region_name))
            rles[channel_name][region_name] = collections.OrderedDict()
            for sample_dir in sorted(hdfs.listdir(region_dir)):
                sample_name = os.path.basename(sample_dir)
                if sample_name in SAMPLES_EXCLUDE:
                    continue
                logging.debug(
                    'Found sample {} in region {} and channel {}'.format(
                        sample_name, region_name, channel_name))
                rles[channel_name][region_name][
                    sample_name] = collections.OrderedDict()
                for rle_dir in sorted(hdfs.listdir(sample_dir)):
                    central_or_shift = os.path.basename(rle_dir)
                    if central_or_shift in SYSTEMATICS_EXCLUDE:
                        continue
                    if not read_all_systematics and central_or_shift != SYSTEMATICS_CENTRAL:
                        continue
                    logging.debug(
                        'Found systematics {} for sample {} in region {} and channel {}'
                        .format(central_or_shift, sample_name, region_name,
                                channel_name))
                    rles[channel_name][region_name][sample_name][
                        central_or_shift] = []
                    rle_filenames = sorted(hdfs.listdir(rle_dir))
                    if not rle_filenames:
                        logging.warning(
                            'Directory {} is empty'.format(rle_dir))
                        continue
                    rle_arr = []
                    for rle_filename in rle_filenames:
                        if not rle_filename.endswith('.txt'):
                            raise RuntimeError(
                                "Unexpected extension in file: %s" %
                                rle_filename)
                        with open(rle_filename, 'r') as rle_file:
                            for line in rle_file:
                                line_stripped = line.rstrip('\n')
                                if not REGEX_RLE.match(line_stripped):
                                    raise RuntimeError(
                                        "Unexpected line found in %s: %s" %
                                        (rle_filename, line_stripped))
                                rle = line_stripped
                                if rle in rle_arr:
                                    logging.error(
                                      "Duplicate event %s found in channel %s, region %s, sample %s, systematics %s" % \
                                      (rle, channel_name, region_name, sample_name, central_or_shift)
                                    )
                                    has_errors = True
                                    continue
                                rle_arr.append(rle)
                    logging.debug(
                        'Found {} events in sample {}, region {}, systematics {}, channel {}'
                        .format(len(rle_arr), sample_name, region_name,
                                central_or_shift, channel_name))
                    rles[channel_name][region_name][sample_name][
                        central_or_shift].extend(rle_arr)
    return rles, has_errors
def plot(input_files, output_files, title, expected_neff, mode):
  histogram_dict = {}
  for sample_name, sample_entry in input_files.items():
    if not hdfs.isfile(sample_entry['input']):
      logging.error('Could not find file {}'.format(sample_entry['input']))
      continue
    root_file = ROOT.TFile.Open(sample_entry['input'], 'read')
    logging.debug('Opened file {}'.format(sample_entry['input']))
    root_directories = list(filter(
      lambda root_dir: root_dir != None, [
        root_file.Get(os.path.join(key.GetName(), mode, 'genEvt')) \
        for key in root_file.GetListOfKeys() if key.GetClassName() == 'TDirectoryFile'
      ]
    ))
    if len(root_directories) != 1:
      raise RuntimeError('Expected single directory in %s' % sample_entry['input'])
    root_dir = root_directories[0]
    histogram_dirs = [
      root_dir.Get(key.GetName()) \
      for key in root_dir.GetListOfKeys() if key.GetClassName() == 'TDirectoryFile'
    ]
    if len(histogram_dirs) != 1:
      raise RuntimeError(
        'Expected single directory containing lumiScale histograms in %s' % sample_entry['input']
      )
    histogram_dir = histogram_dirs[0]
    histograms = [
      key.GetName() for key in histogram_dir.GetListOfKeys() \
      if key.GetClassName().startswith('TH1') and 'lumiScale' in key.GetName()
    ]
    for histogram_name_actual in histograms:
      histogram_name = histogram_name_actual.replace('_lumiScale', '').replace('CMS_ttHl_', '') \
                       if histogram_name_actual != 'lumiScale' else 'central'
      histogram = histogram_dir.Get(histogram_name_actual).Clone()
      histogram.SetDirectory(0)
      if histogram.GetEntries() != sample_entry['nentries'] and mode == 'unbiased':
        raise RuntimeError('Expected {} entries from {} in file {}, but got {} entries'.format(
          sample_entry['nentries'], histogram_name, sample_entry['input'], histogram.GetEntries(),
        ))
      if histogram_name not in histogram_dict:
        histogram_dict[histogram_name] = {
          'histogram' : histogram,
          'nentries'  : histogram.GetEntries(),
          'nfiles'    : 1,
        }
      else:
        histogram_dict[histogram_name]['histogram'].Add(histogram)
        histogram_dict[histogram_name]['nentries'] += histogram.GetEntries()
        histogram_dict[histogram_name]['nfiles'] += 1

    root_file.Close()

  if not histogram_dict:
    logging.error('Could not find histograms for samples {}'.format(', '.join(list(input_files.keys()))))
    return

  if len(set(histogram_dict[histogram_name]['nfiles'] for histogram_name in histogram_dict)) != 1:
    raise RuntimeError(
      'Inconsistent number of files found for samples %s' % ', '.join(list(input_files.keys()))
    )
  if len(set(histogram_dict[histogram_name]['nentries'] for histogram_name in histogram_dict)) != 1:
    raise RuntimeError(
      'Inconsistent number of entries found in samples %s' % ', '.join(list(input_files.keys()))
    )

  min_y = -1
  max_y = -1
  nentries = -1
  for histograms in histogram_dict.values():
    histogram = histograms['histogram']
    y_content = histogram.GetBinContent(1)
    y_error   = histogram.GetBinError(1)

    y_down = y_content - y_error
    y_up   = y_content + y_error

    if min_y < 0:
      min_y = y_down
    if max_y < 0:
      max_y = y_up
    if y_down < min_y:
      min_y = y_down
    if y_up > max_y:
      max_y = y_up

    if nentries < 0:
      nentries = histograms['nentries']
    else:
      assert(nentries == histograms['nentries'])

    if not (y_down < expected_neff < y_up) and mode == 'unbiased':
      logging.warning(
        "Effective event count {} not within {} +- {}".format(expected_neff, y_content, y_error)
      )

  if mode == 'unbiased':
    min_y = min(min_y, expected_neff)
    max_y = max(max_y, expected_neff)
  diff = 0.2 * (max_y - min_y)
  min_y -= diff
  max_y += diff

  canvas = ROOT.TCanvas('c', 'c', 1200, 900)
  canvas.SetGrid()
  ROOT.gStyle.SetOptStat(0)

  legend = ROOT.TLegend(0.1, 0.7, 0.48, 0.9)
  legend.SetHeader('N_{eff} (%d entries)' % nentries)

  expected_histogram = None

  line_width = 3
  marker_style = 20
  fill_style = 4000

  lines = []

  for idx, histogram_name in enumerate(sorted(histogram_dict.keys())):
    histogram = histogram_dict[histogram_name]['histogram']
    color = 2 + idx

    histogram.SetTitle(title)
    histogram.SetAxisRange(min_y, max_y, "Y")
    histogram.SetLineColor(color)
    histogram.SetMarkerColor(color)
    histogram.SetLineWidth(line_width)
    histogram.SetMarkerStyle(marker_style)
    histogram.SetFillStyle(fill_style)
    histogram.Draw("l e1%s" % (" same" if idx > 0 else ""))

    y_content = histogram.GetBinContent(1)
    y_error   = histogram.GetBinError(1)
    y_up      = y_content + y_error
    y_down    = y_content - y_error

    bin_width  = histogram.GetBinWidth(1)
    bin_center = histogram.GetBinCenter(1)
    line_min_x = bin_center - bin_width / 4
    line_max_x = bin_center + bin_width / 4

    line_down = ROOT.TLine(line_min_x, y_down, line_max_x, y_down)
    line_down.SetLineColor(color)
    line_down.SetLineWidth(line_width)
    line_down.Draw()
    lines.append(line_down)

    line_up = ROOT.TLine(line_min_x, y_up, line_max_x, y_up)
    line_up.SetLineColor(color)
    line_up.SetLineWidth(line_width)
    line_up.Draw()
    lines.append(line_up)

    sig_digits = max(8 - int(math.ceil(math.log10(y_content))), 1) if y_content > 0. else 1
    leg_pattern = '%s (%.{}f #pm %.{}f)'.format(sig_digits, sig_digits)
    leg_name = leg_pattern % (histogram_name, y_content, y_error)
    legend.AddEntry(histogram, leg_name)

    logging.debug(
      'Effective event count for the sys unc option {} is {} +- {}'.format(
        histogram_name, y_content, y_error
      )
    )

    if not expected_histogram and mode == 'unbiased':
      expected_histogram = histogram.Clone()
      expected_histogram.Reset()
      expected_histogram.SetBinContent(1, expected_neff)
      expected_histogram.SetBinError(1, 0)
      expected_histogram.SetLineColor(ROOT.kBlack)
      expected_histogram.SetMarkerColor(ROOT.kBlack)
      expected_histogram.SetLineWidth(line_width)
      expected_histogram.SetMarkerStyle(marker_style)
      expected_histogram.SetLineStyle(9)
      expected_histogram.SetFillStyle(fill_style)

  if expected_histogram:
    logging.debug('Expecting {} events'.format(expected_neff))
    expected_histogram.Draw("e2 same")
    legend.AddEntry(expected_histogram, 'expected (%.1f)' % expected_neff)

  legend.Draw()

  for output_file in output_files:
    canvas.SaveAs(output_file)

  canvas.Close()
  legend.Delete()
  if expected_histogram:
    expected_histogram.Delete()
  for histogram_name in histogram_dict:
    histogram_dict[histogram_name]['histogram'].Delete()
  for line in lines:
    line.Delete()
Exemple #27
0
    def memJobList(self, inputFileList, rle_whitelist):
        '''
        Args:
          inputFileList:{ int, array of strings }; i.e. fileset* ID and the list of files

        * if the script were to generate configuration files, this number would correspond to job ID

        Returns:
          { int : { str : int, str : [str, str, ...], str : [int, int] } }
            |        |          |                      |
         job id  "fileset_id" "input_fileset"     "event_range"

        The function reads a given set of files and determines the event range
        '''
        memJobDict = {}
        jobId = 0
        apply_rle_filter = bool(self.rle_filter_file)
        for filesetId, inputFileSet in inputFileList.iteritems():
            memJobDict_common = { 'fileset_id' : filesetId, 'input_fileset' : inputFileSet }
            print("Processing file %s" % inputFileSet)
            ch = ROOT.TChain(self.treeName)
            for fn in inputFileSet:
                # chaining a file
                logging.debug("Processing file {fileName}".format(fileName = fn))
                ch.AddFile(fn)

            nof_entries = ch.GetEntries()

            memJobDict_common['nof_entries'] = nof_entries
            if nof_entries == 0:
                jobId += 1
                memJobDict[jobId] = dict({
                    'event_range'     : [0, 0],
                    'nof_int'         : 0,
                    'nof_int_pass'    : 0,
                    'nof_events_pass' : 0,
                    'nof_zero'        : 0,
                }, **memJobDict_common)
                continue

            current_pos = 0
            evt_ranges = []

            counter, counter_arr = 0, []
            nof_events_pass_counter, nof_events_pass   = 0, []
            nof_int_pass_counter,    nof_int_pass      = 0, []
            nof_zero_integrations,   nof_events_zero   = 0, []
            whitelist_all,           whitelist_running = [], []

            run                    = array.array('I', [0])
            luminosityBlock        = array.array('I', [0])
            event                  = array.array('L', [0])
            maxPermutations_addMEM = array.array('i', [0])
            ch.SetBranchAddress("run",             run)
            ch.SetBranchAddress("luminosityBlock", luminosityBlock)
            ch.SetBranchAddress("event",           event)
            if self.maxPermutations_branchName is not None and self.maxPermutations_branchName != "":
              ch.SetBranchAddress(self.maxPermutations_branchName, maxPermutations_addMEM)
            else:
              maxPermutations_addMEM[0] = 1

            for i in range(nof_entries):
                ch.GetEntry(i)
                if i > 0 and i % 10000 == 0:
	            print(" Processing event %i/%i" % (i, nof_entries))
                    logging.debug("Processing event %i/%i" % (i, nof_entries))

                rle = ':'.join(map(lambda nr: str(nr[0]), [ run, luminosityBlock, event ]))

                nof_integrations = maxPermutations_addMEM[0]
                if apply_rle_filter:
                    if rle in rle_whitelist:
                        if not (nof_integrations > 0):
                            logging.error("Expected non-zero # integrations in event {}, but got {}".format(rle, nof_integrations))
                        nof_integrations = 1
                    else:
                        nof_integrations = 0

                if nof_integrations < 0:
                    nof_integrations = 0

                if nof_integrations >= 1:
                    nof_events_pass_counter += 1
                    nof_int_pass_counter += nof_integrations
                else:
                    nof_zero_integrations += 1

                if nof_integrations > self.mem_integrations_per_job:
                    raise ValueError("Too many nof_integrations = %d in file(s) %s at %d:%d:%d" %
                                     (nof_integrations, ', '.join(inputFileSet), ch.run, ch.lumi, ch.evt))

                if (counter + nof_integrations) > self.mem_integrations_per_job:
                    if evt_ranges:
                        evt_ranges.append([evt_ranges[-1][1], current_pos])
                    else:
                        evt_ranges.append([0, current_pos])
                    counter_arr.append(counter)
                    counter = 0

                    nof_events_pass.append(nof_events_pass_counter)
                    nof_events_pass_counter = 0

                    nof_int_pass.append(nof_int_pass_counter)
                    nof_int_pass_counter = 0

                    nof_events_zero.append(nof_zero_integrations)
                    nof_zero_integrations = 0

                    if apply_rle_filter:
                        whitelist_all.append(whitelist_running)
                        whitelist_running = []

                if rle in rle_whitelist:
                    whitelist_running.append(rle)

                counter += nof_integrations
                current_pos += 1

            if counter <= self.mem_integrations_per_job and counter >= 0:
                if evt_ranges:
                    evt_ranges.append([evt_ranges[-1][1], int(nof_entries)])
                else:
                    evt_ranges.append([0, int(nof_entries)])
                counter_arr.append(counter)
                nof_events_pass.append(nof_events_pass_counter)
                nof_int_pass.append(nof_int_pass_counter)
                nof_events_zero.append(nof_zero_integrations)
                if apply_rle_filter:
                    whitelist_all.append(whitelist_running)

            # ensure that the event ranges won't overlap (i.e. there won't be any double-processing of any event)
            evt_ranges_cat = []
            for v in [range(x[0], x[1]) for x in evt_ranges]:
              evt_ranges_cat += v
            assert(evt_ranges_cat == range(nof_entries))
            assert(bool(evt_ranges))

            for i in range(len(evt_ranges)):
              if self.max_jobs_per_sample == -1 or jobId < self.max_jobs_per_sample:
                jobId += 1
                memJobDict[jobId] = dict({
                    'event_range'     : evt_ranges[i],
                    'nof_int'         : counter_arr[i],
                    'nof_int_pass'    : nof_int_pass[i],
                    'nof_events_pass' : nof_events_pass[i],
                    'nof_zero'        : nof_events_zero[i],
                    'whitelist'       : whitelist_all[i] if apply_rle_filter else [],
                }, **memJobDict_common)
                # we now have all event ranges per one file, let's add them to the dictionary

            del ch
        return memJobDict