예제 #1
0
def deepCheckWeight(file):
    """ some root files only contain the branches kept from the beginning
        but not those from the filler, e.g. the weight branch
        Those files are identified here, as weight==nan and thus the yield is nan
    """
    from math import isnan
    from RootTools.core.Sample import Sample

    # convert dpm file pathes
    sample = Sample.fromFiles(name="sample", treeName="Events", files=file)
    # check for branch:
    l = sample.chain.GetListOfBranches()
    if not 'weight' in [l.At(i).GetName() for i in range(l.GetSize())]:
        return 0
    val = sample.getYieldFromDraw(weightString="weight")['val']
    del sample
    #logger.debug("Val in deepCheckWeight: %r", val)
    return not isnan(val)
예제 #2
0
    def __init__(self,
                 heppy_samples,
                 dpm_directories,
                 cache_file,
                 multithreading=True):
        # Read cache file, if exists
        if os.path.exists(cache_file) and not overwrite:
            self.sample_map = pickle.load(file(cache_file))
            logger.info("Loaded cache file %s" % cache_file)
        else:
            logger.info("Cache file %s not found. Recreate map.", cache_file)
            logger.info("Check proxy.")

            # Proxy certificate
            from RootTools.core.helpers import renew_proxy
            # Make proxy in afs to allow batch jobs to run
            proxy_path = os.path.expandvars('$HOME/private/.proxy')
            proxy = renew_proxy(proxy_path)
            logger.info("Using proxy %s" % proxy)

            # Read dpm directories
            self.cmg_directories = {}
            for data_path in dpm_directories:
                logger.info("Walking dpm directory %s", data_path)
                walker = walk_dpm(data_path)
                self.cmg_directories[
                    data_path] = walker.walk_dpm_cmgdirectories('.', maxN=maxN)

                #del walker

            logger.info("Now mapping directories to heppy samples")
            for heppy_sample in heppy_samples:
                heppy_sample.candidate_directories = []
                pd, era = heppy_sample.dataset.split('/')[1:3]
                for data_path in self.cmg_directories.keys():
                    for dpm_directory in self.cmg_directories[data_path].keys(
                    ):
                        if not ('/%s/' % pd in dpm_directory):
                            logger.debug("/%s/ not in dpm_directory %s", pd,
                                         dpm_directory)
                            continue
                        if not ('/' + era in dpm_directory):
                            logger.debug("/%s not in dpm_directory %s", era,
                                         dpm_directory)
                            continue
                        heppy_sample.candidate_directories.append(
                            [data_path, dpm_directory])
                        logger.debug("heppy sample %s in %s",
                                     heppy_sample.name, dpm_directory)
                logger.info("Found heppy sample %s in %i directories.",
                            heppy_sample.name,
                            len(heppy_sample.candidate_directories))

            # Merge
            from RootTools.core.Sample import Sample
            logger.info(
                "Now making new sample map from %i directories and for %i heppy samples to be stored in %s",
                len(dpm_directories), len(heppy_samples), cache_file)
            self.sample_map = {}
            for heppy_sample in heppy_samples:
                if len(heppy_sample.candidate_directories) == 0:
                    logger.info("No directory found for %s", heppy_sample.name)
                else:
                    normalization, files = walker.combine_cmg_directories(\
                            cmg_directories = {dpm_directory:self.cmg_directories[data_path][dpm_directory] for data_path, dpm_directory in heppy_sample.candidate_directories },
                            multithreading = multithreading,
                        )
                    logger.info(
                        "Sample %s: Found a total of %i files with normalization %3.2f",
                        heppy_sample.name, len(files), normalization)
                    tmp_files = []
                    for f in files:
                        isGoodFile = False
                        try:
                            isGoodFile = checkRootFile(
                                "root://hephyse.oeaw.ac.at/" + os.path.join(f))
                            logger.debug("File %s got added", f)
                        except IOError:
                            logger.info("File %s is corrupted, skipping", f)
                        if isGoodFile: tmp_files.append(f)
                    self.sample_map[heppy_sample] = Sample.fromFiles(
                        heppy_sample.name,
                        files=[
                            'root://hephyse.oeaw.ac.at/' + f for f in tmp_files
                        ],
                        normalization=normalization,
                        treeName='tree',
                        isData=heppy_sample.isData,
                        maxN=maxN)

                    logger.info(
                        "Combined %i directories for sample %s to a total of %i files with normalization %3.2f",
                        len(heppy_sample.candidate_directories),
                        heppy_sample.name, len(files), normalization)

            # Store cache file
            dir_name = os.path.dirname(cache_file)
            if len(self.sample_map.keys()) > 0:
                if not os.path.exists(dir_name): os.makedirs(dir_name)
                pickle.dump(self.sample_map, file(cache_file, 'w'))
                logger.info("Created MC sample cache %s", cache_file)
            else:
                logger.info("Skipping to write %s because map is empty.",
                            cache_file)