Beispiel #1
0
    def __loadChain(self):
        ''' Load the TChain. Private.
        '''
        if len(self.files) == 0:
            raise helpers.EmptySampleError("Sample {name} has no input files! Can not load.".format(name = self.name) )
        else:
            self._chain = ROOT.TChain(self.treeName)
            counter = 0
            for f in self.files:
                logger.debug("Now adding file %s to sample '%s'", f, self.name)
                try:
                    if helpers.checkRootFile(f, checkForObjects=[self.treeName]):
                        self._chain.Add(f)
                        counter+=1
                    else:
                        logger.error( "Check of root file failed. Skipping. File: %s", f )
                except IOError as e:
                    logger.error( "Could not load file %s", f )
                    #raise e

            logger.debug( "Loaded %i files for sample '%s'.", counter, self.name )

        # Add friends
        if hasattr( self, 'friends'):  # Catch cases where cached samples have no default value for friends attribute
            for friend_sample, friend_treeName in self.friends:
                self.chain.AddFriend(friend_sample.chain, friend_treeName)
Beispiel #2
0
    def __loadChain(self):
        ''' Load the TChain. Private.
        '''
        if len(self.files) == 0:
            raise helpers.EmptySampleError("Sample {name} has no input files! Can not load.".format(name = self.name) )
        else:
            self._chain = ROOT.TChain(self.treeName)
            counter = 0
            for f in self.files:
                logger.debug("Now adding file %s to sample '%s'", f, self.name)
                try:
                    if helpers.checkRootFile(f, checkForObjects=[self.treeName]):
                        self._chain.Add(f)
                        counter+=1
                    else:
                        logger.error( "Check of root file failed. Skipping. File: %s", f )
                except IOError as e:
                    logger.error( "Could not load file %s", f )
                    #raise e

            logger.debug( "Loaded %i files for sample '%s'.", counter, self.name )

        # Add friends
        if hasattr( self, 'friends'):  # Catch cases where cached samples have no default value for friends attribute
            for friend_sample, friend_treeName in self.friends:
                self.chain.AddFriend(friend_sample.chain, friend_treeName)
Beispiel #3
0
 def __loadChain(self):
     ''' Load the TChain. Private.
     '''
     if len(self.files) == 0:
         raise helpers.EmptySampleError("Sample {name} has no input files! Can not load.".format(name = self.name) )
     else:
         self._chain = ROOT.TChain(self.treeName)
         counter = 0
         for f in self.files:
             logger.debug("Now adding file %s to sample '%s'", f, self.name)
             try:
                 if helpers.checkRootFile(f, checkForObjects=[self.treeName]):
                     self._chain.Add(f)
                     counter+=1
             except IOError:
                 pass
                 logger.warning( "Could not load file %s", f )
         logger.debug( "Loaded %i files for sample '%s'.", counter, self.name )
Beispiel #4
0
    def __init__(self,
                 heppy_samples,
                 dpm_directories,
                 cache_file,
                 multithreading=True):
        # Read cache file, if exists
        if os.path.exists(cache_file) and not overwrite:
            self.sample_map = pickle.load(file(cache_file))
            logger.info("Loaded cache file %s" % cache_file)
        else:
            logger.info("Cache file %s not found. Recreate map.", cache_file)
            logger.info("Check proxy.")

            # Proxy certificate
            from RootTools.core.helpers import renew_proxy
            # Make proxy in afs to allow batch jobs to run
            proxy_path = os.path.expandvars('$HOME/private/.proxy')
            proxy = renew_proxy(proxy_path)
            logger.info("Using proxy %s" % proxy)

            # Read dpm directories
            self.cmg_directories = {}
            for data_path in dpm_directories:
                logger.info("Walking dpm directory %s", data_path)
                walker = walk_dpm(data_path)
                self.cmg_directories[
                    data_path] = walker.walk_dpm_cmgdirectories('.', maxN=maxN)

                #del walker

            logger.info("Now mapping directories to heppy samples")
            for heppy_sample in heppy_samples:
                heppy_sample.candidate_directories = []
                pd, era = heppy_sample.dataset.split('/')[1:3]
                for data_path in self.cmg_directories.keys():
                    for dpm_directory in self.cmg_directories[data_path].keys(
                    ):
                        if not ('/%s/' % pd in dpm_directory):
                            logger.debug("/%s/ not in dpm_directory %s", pd,
                                         dpm_directory)
                            continue
                        if not ('/' + era in dpm_directory):
                            logger.debug("/%s not in dpm_directory %s", era,
                                         dpm_directory)
                            continue
                        heppy_sample.candidate_directories.append(
                            [data_path, dpm_directory])
                        logger.debug("heppy sample %s in %s",
                                     heppy_sample.name, dpm_directory)
                logger.info("Found heppy sample %s in %i directories.",
                            heppy_sample.name,
                            len(heppy_sample.candidate_directories))

            # Merge
            from RootTools.core.Sample import Sample
            logger.info(
                "Now making new sample map from %i directories and for %i heppy samples to be stored in %s",
                len(dpm_directories), len(heppy_samples), cache_file)
            self.sample_map = {}
            for heppy_sample in heppy_samples:
                if len(heppy_sample.candidate_directories) == 0:
                    logger.info("No directory found for %s", heppy_sample.name)
                else:
                    normalization, files = walker.combine_cmg_directories(\
                            cmg_directories = {dpm_directory:self.cmg_directories[data_path][dpm_directory] for data_path, dpm_directory in heppy_sample.candidate_directories },
                            multithreading = multithreading,
                        )
                    logger.info(
                        "Sample %s: Found a total of %i files with normalization %3.2f",
                        heppy_sample.name, len(files), normalization)
                    tmp_files = []
                    for f in files:
                        isGoodFile = False
                        try:
                            isGoodFile = checkRootFile(
                                "root://hephyse.oeaw.ac.at/" + os.path.join(f))
                            logger.debug("File %s got added", f)
                        except IOError:
                            logger.info("File %s is corrupted, skipping", f)
                        if isGoodFile: tmp_files.append(f)
                    self.sample_map[heppy_sample] = Sample.fromFiles(
                        heppy_sample.name,
                        files=[
                            'root://hephyse.oeaw.ac.at/' + f for f in tmp_files
                        ],
                        normalization=normalization,
                        treeName='tree',
                        isData=heppy_sample.isData,
                        maxN=maxN)

                    logger.info(
                        "Combined %i directories for sample %s to a total of %i files with normalization %3.2f",
                        len(heppy_sample.candidate_directories),
                        heppy_sample.name, len(files), normalization)

            # Store cache file
            dir_name = os.path.dirname(cache_file)
            if len(self.sample_map.keys()) > 0:
                if not os.path.exists(dir_name): os.makedirs(dir_name)
                pickle.dump(self.sample_map, file(cache_file, 'w'))
                logger.info("Created MC sample cache %s", cache_file)
            else:
                logger.info("Skipping to write %s because map is empty.",
                            cache_file)
Beispiel #5
0
    def fromDAS(cls, name, dataset, instance = 'global', prefix='root://cms-xrd-global.cern.ch/', texName = None, maxN = None, dbFile=None, overwrite=False, skipCheck = False):
        ''' Make sample from DAS. 
        '''
        # https://github.com/CERN-PH-CMG/cmg-cmssw/blob/0f1d3bf62e7ec91c2e249af1555644b7f414ab50/CMGTools/Production/python/dataset.py#L437

        maxN = maxN if maxN is not None and maxN>0 else None
        limit = maxN if maxN else 0
        DASname = dataset.rstrip('/')

        n_cache_files = 0 
        # Don't use the cache on partial queries
        if dbFile is not None and ( maxN<0 or maxN is None ):
            cache = Database(dbFile, "fileCache", ["name"]) 
            n_cache_files = cache.contains({'name':name})
        else:
            cache = None

        if n_cache_files and not overwrite:
            files = [ f["value"] for f in cache.getDicts({'name':name}) ]
            logger.info('Found sample %s in cache %s, return %i files.', name, dbFile, len(files))
        else:
#            def _dasPopen(dbs):
#                if 'LSB_JOBID' in os.environ:
#                    raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs
#                if 'X509_USER_PROXY' in os.environ:
#                    dbs += " --key {0} --cert {0}".format(os.environ['X509_USER_PROXY'])
#                logger.info('DAS query\t: %s',  dbs)
#                return os.popen(dbs)
#
#            sampleName = dataset.rstrip('/')
#            query, qwhat = sampleName, "dataset"
#            if "#" in sampleName: qwhat = "block"
#
#            dbs='das_client --query="file %s=%s instance=prod/%s" --limit %i'%(qwhat,query, instance, limit)
#            dbsOut = _dasPopen(dbs).readlines()
            
            if overwrite:
                cache.removeObjects({"name":name})

            def _dasPopen(dbs):
                if 'LSB_JOBID' in os.environ:
                    raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs
                logger.info('DAS query\t: %s',  dbs)
                return os.popen(dbs)

            query, qwhat = DASname, "dataset"
            if "#" in DASname: qwhat = "block"

            dbs='dasgoclient -query="file %s=%s instance=prod/%s" --limit %i'%(qwhat,query, instance, limit)
            dbsOut = _dasPopen(dbs).readlines()
            
            files = []
            for line in dbsOut:
                if line.startswith('/store/'):
                    line = line.rstrip()
                    filename = line
                    try:
                        if skipCheck or helpers.checkRootFile(prefix+filename):
                            files.append(filename)
                    except IOError:
                        logger.warning( "IOError for file %s. Skipping.", filename )

                    if cache is not None:
                        cache.add({"name":name}, filename, save=True)

        if limit>0: files=files[:limit]

        result = cls(name, files=[prefix+file for file in files], texName = texName)
        result.DASname = DASname
        return result
Beispiel #6
0
    def fromCMGCrabDirectory(cls, name, baseDirectory, treeFilename = 'tree.root', treeName = 'tree', maxN = None, \
            selectionString = None, weightString = None,
            isData = False, color = 0, texName = None):
        '''Load a CMG crab output directory
        ''' 
        import tarfile
        from cmg_helpers import read_cmg_normalization

        maxN = maxN if maxN is not None and maxN>0 else None

        # Walk through all subdirectories and pick up pairs of files '..._n.root and ..._n.tgz where n is the job number'
        treeFiles = {}
        zipFiles  = {}
        for root, subFolders, filenames in os.walk( baseDirectory ):
            for filename in filenames:
                base, ext = os.path.splitext( filename )
                try:
                    n = int(base.split('_')[-1])
                except:
                    # filename is not of the form 'xyz_n' where n is the job number
                    continue
                # add the tgz and files to the dict.   
                filename_ = os.path.join(root, filename)
                if ext=='.root':
                    treeFiles[n] = filename_
                if ext=='.tgz':
                    zipFiles[n] = filename_
        # Find pairs of zip and root files
        pairs = set(zipFiles.keys()) & set(treeFiles.keys())
        n_jobs = len( set(zipFiles.keys()) | set(treeFiles.keys()) )

        normalization = 0
        files = []
        failedJobs = []
        for n in pairs:
            sumW = None
            tf = tarfile.open( zipFiles[n], 'r:gz' )
            for f in tf.getmembers():
                if "SkimReport.txt" in f.name:
                    sumW = read_cmg_normalization(tf.extractfile(f))
                if sumW is not None: break
            if sumW is None:
                logger.warning( "No normalization found when reading tar file %s", zipFiles[n] )
            tf.close()

            # Check treefile for whether the tree 'treeName' can be found.
            # This is an implicit check for broken, recovered or otherwise corrupted root files.
            treeFile = treeFiles[n] if helpers.checkRootFile(treeFiles[n], checkForObjects = [treeName] ) else None

            if treeFile is None: logger.warning( "File %s looks broken. Checked for presence of tree %s.", treeFiles[n] , treeName )

            # If both, normalization and treefile are OK call it successful.
            if sumW and treeFile:
                files.append( treeFile )
                normalization += sumW
                logger.debug( "Successfully read job %i and incremented normalization by %7.2f",  n, sumW )
            else:
                failedJobs.append( n )

        # Don't allow empty samples
        if len(files) == 0:
            raise helpers.EmptySampleError("Could not find valid crab CMG output for sample {0}. Total number of jobs: {1}. baseDirectory: {2}"\
                  .format(name, len(pairs), baseDirectory))

        # Log statements
        eff = 100*len(failedJobs)/float( n_jobs )
        logger.debug("Loaded CMGOutput sample %s. Total number of  jobs: %i, both tgz and root: %i. Normalization: %7.2f Bad: %i. Inefficiency: %3.3f", \
                          name, len(pairs), n_jobs, normalization, len(failedJobs), eff)

        logger.debug( "Read %i chunks and total normalization of %f", len(files), normalization )
        return cls( name = name, treeName = treeName, files = files, normalization = normalization, 
                selectionString = selectionString, weightString = weightString, 
                isData = isData, color = color, texName = texName )
Beispiel #7
0
    def fromCMGOutput(cls, name, baseDirectory, treeFilename = 'tree.root', chunkString = None, treeName = 'tree', maxN = None, \
            selectionString = None, weightString = None, 
            isData = False, color = 0, texName = None):
        '''Load a CMG output directory from e.g. unzipped crab output in the 'Chunks' directory structure. 
           Expects the presence of the tree root file and the SkimReport.txt
        ''' 
        from cmg_helpers import read_cmg_normalization
        maxN = maxN if maxN is not None and maxN>0 else None

        # Reading all subdirectories in base directory. If chunkString != None, require cmg output name formatting
        chunkDirectories = []

        # FIXME: Better to loop only over subdirectories in base directory?
        for x in os.listdir(baseDirectory): 
            if os.path.isdir(os.path.join(baseDirectory, x)):
                if not chunkString or (x.startswith(chunkString) and x.endswith('_Chunk')) or x==chunkString:
                    chunkDirectories.append(os.path.join(baseDirectory, x))
                    if len(chunkDirectories)==maxN:break

        logger.debug( "Found %i chunk directories with chunkString %s in base directory %s", \
                           len(chunkDirectories), chunkString, baseDirectory )
        normalization = 0
        files = []
        failedChunks=[]
        goodChunks  =[]

        for i, chunkDirectory in enumerate( chunkDirectories ):
            success = False
            logger.debug("Reading chunk %s", chunkDirectory)

            # Find normalization
            sumW = None
            for root, subFolders, filenames in os.walk( chunkDirectory ):
                # Determine normalization constant
                if 'SkimReport.txt' in filenames:
                    skimReportFilename = os.path.join(root, 'SkimReport.txt')
                    with open(skimReportFilename, 'r') as fin:
                      sumW = read_cmg_normalization(fin)
                      if not sumW:
                          logger.warning( "Read chunk %s and found report '%s' but could not read normalization.",
                                               chunkDirectory, skimReportFilename )
            # Find treefile
            treeFile = None
            for root, subFolders, filenames in os.walk( chunkDirectory ):
                # Load tree file 
                if treeFilename in filenames:
                    treeFile = os.path.join(root, treeFilename)
                    # Checking whether root file is OG and contains a tree
                    if not helpers.checkRootFile(treeFile, checkForObjects=[treeName] ):
                        logger.warning( "Read chunk %s and found tree file '%s' but file looks broken.",  chunkDirectory, treeFile )

            # If both, normalization and treefile are OK call it successful.
            if sumW and treeFile:
                files.append( treeFile )
                normalization += sumW
                logger.debug( "Successfully read chunk %s and incremented normalization by %7.2f",  chunkDirectory, sumW )
                success = True
                goodChunks.append( chunkDirectory )

            if not success:
                failedChunks.append( chunkDirectory )

        # Don't allow empty samples
        if len(goodChunks) == 0:
            raise helpers.EmptySampleError("Could not find good CMGOutput chunks for sample {0}. Total number of chunks: {1}. baseDirectory: {2}"\
                  .format(name, len(chunkDirectories), baseDirectory))

        # Log statements
        eff = 100*len(failedChunks)/float( len(chunkDirectories) )
        logger.debug("Loaded CMGOutput sample %s. Total number of chunks : %i. Normalization: %7.2f Bad: %i. Inefficiency: %3.3f", \
                          name, len(chunkDirectories), normalization, len(failedChunks), eff)

        for chunk in failedChunks:
            logger.debug( "Failed to load chunk %s", chunk)
        logger.debug( "Read %i chunks and total normalization of %f", len(files), normalization )
        return cls( name = name, treeName = treeName, files = files, normalization = normalization, 
            selectionString = selectionString, weightString = weightString,
            isData = isData, color = color, texName = texName )
Beispiel #8
0
argParser.add_argument('--logLevel',    action='store',     nargs='?', choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'TRACE', 'NOTSET'], default='INFO', help="Log level for logging" )

args = argParser.parse_args()

logger = logger.get_logger(args.logLevel, logFile = None)

# Check if the gridpack exists
if not os.path.exists( args.gridpack ):
    logger.error( "Gridpack %s not found. Exit", args.gridpack )
    sys.exit(0)

gp       = os.path.basename(args.gridpack).rstrip('.tar.xz')

# Check if the output is there
out_file = os.path.join( skim_output_directory, 'gen', args.outDir, gp, gp+'.root') 
if os.path.exists( out_file ) and checkRootFile( out_file, checkForObjects=["Events"] ):
    logger.info( "Found output file %s.", out_file )
    if args.overwrite:
        os.remove( out_file )
        logger.info( "Deleted, because I overwrite." )
    else:
        sys.exit(0)
else:
    logger.info( "Did not find output file %s. Look for gen sample. ", out_file )

# Check if the intermediate gen file is there
gen_file = os.path.join( args.genSampleDir, gp, 'events.root' ) 
if os.path.exists( gen_file ) and checkEDMRootFile( gen_file ):
    logger.info( "Found edm gen file %s.", gen_file)
    if args.overwriteGenFile:
        os.remove( gen_file )
Beispiel #9
0
    def fromDPMDirectory(cls,
                         name,
                         directory,
                         prefix='root://hephyse.oeaw.ac.at/',
                         texName=None,
                         maxN=None,
                         dbFile=None,
                         overwrite=False,
                         skipCheck=False):

        maxN = maxN if maxN is not None and maxN > 0 else None
        limit = maxN if maxN else 0

        n_cache_files = 0
        # Don't use the cache on partial queries
        if dbFile is not None and (maxN < 0 or maxN is None):
            cache = Database(dbFile, "fileCache", ["name"])
            n_cache_files = cache.contains({'name': name})
        else:
            cache = None

        if n_cache_files and not overwrite:
            files = [f["value"] for f in cache.getDicts({'name': name})]
            logger.info('Found sample %s in cache %s, return %i files.', name,
                        dbFile, len(files))
        else:
            if overwrite:
                cache.removeObjects({"name": name})

            def _dasPopen(dbs):
                if 'LSB_JOBID' in os.environ:
                    raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs
                logger.info('DAS query\t: %s', dbs)
                return os.popen(dbs)

            files = []
            dbs = 'xrdfs %s ls %s' % (prefix, directory)
            dbsOut = _dasPopen(dbs).readlines()

            for line in dbsOut:
                if line.startswith('/store/'):
                    line = line.rstrip()
                    filename = line
                    try:
                        if skipCheck or helpers.checkRootFile(prefix +
                                                              filename):
                            files.append(filename)
                    except IOError:
                        logger.warning("IOError for file %s. Skipping.",
                                       filename)

                    if cache is not None:
                        cache.add({"name": name}, filename, save=True)

        if limit > 0: files = files[:limit]

        result = cls(name,
                     files=[prefix + file for file in files],
                     texName=texName)
        result.DASname = prefix + directory.rstrip("/")
        return result
Beispiel #10
0
    def fromCMGCrabDirectory(cls, name, baseDirectory, treeFilename = 'tree.root', treeName = 'tree', maxN = None, xSection = -1,\
            selectionString = None, weightString = None,
            isData = False, color = 0, texName = None):
        '''Load a CMG crab output directory
        ''' 
        import tarfile
        from cmg_helpers import read_cmg_normalization

        maxN = maxN if maxN is not None and maxN>0 else None

        # Walk through all subdirectories and pick up pairs of files '..._n.root and ..._n.tgz where n is the job number'
        treeFiles = {}
        zipFiles  = {}
        for root, subFolders, filenames in os.walk( baseDirectory ):
            for filename in filenames:
                base, ext = os.path.splitext( filename )
                try:
                    n = int(base.split('_')[-1])
                except:
                    # filename is not of the form 'xyz_n' where n is the job number
                    continue
                # add the tgz and files to the dict.   
                filename_ = os.path.join(root, filename)
                if ext=='.root':
                    treeFiles[n] = filename_
                if ext=='.tgz':
                    zipFiles[n] = filename_
        # Find pairs of zip and root files
        pairs = set(zipFiles.keys()) & set(treeFiles.keys())
        n_jobs = len( set(zipFiles.keys()) | set(treeFiles.keys()) )

        normalization = 0
        files = []
        failedJobs = []
        for n in pairs:
            sumW = None
            tf = tarfile.open( zipFiles[n], 'r:gz' )
            for f in tf.getmembers():
                if "SkimReport.txt" in f.name:
                    sumW = read_cmg_normalization(tf.extractfile(f))
                if sumW is not None: break
            if sumW is None:
                logger.warning( "No normalization found when reading tar file %s", zipFiles[n] )
            tf.close()

            # Check treefile for whether the tree 'treeName' can be found.
            # This is an implicit check for broken, recovered or otherwise corrupted root files.
            treeFile = treeFiles[n] if helpers.checkRootFile(treeFiles[n], checkForObjects = [treeName] ) else None

            if treeFile is None: logger.warning( "File %s looks broken. Checked for presence of tree %s.", treeFiles[n] , treeName )

            # If both, normalization and treefile are OK call it successful.
            if sumW and treeFile:
                files.append( treeFile )
                normalization += sumW
                logger.debug( "Successfully read job %i and incremented normalization by %7.2f",  n, sumW )
            else:
                failedJobs.append( n )

        # Don't allow empty samples
        if len(files) == 0:
            raise helpers.EmptySampleError("Could not find valid crab CMG output for sample {0}. Total number of jobs: {1}. baseDirectory: {2}"\
                  .format(name, len(pairs), baseDirectory))

        # Log statements
        eff = 100*len(failedJobs)/float( n_jobs )
        logger.debug("Loaded CMGOutput sample %s. Total number of  jobs: %i, both tgz and root: %i. Normalization: %7.2f Bad: %i. Inefficiency: %3.3f", \
                          name, len(pairs), n_jobs, normalization, len(failedJobs), eff)

        logger.debug( "Read %i chunks and total normalization of %f", len(files), normalization )
        return cls( name = name, treeName = treeName, files = files, normalization = normalization, xSection = xSection, 
                selectionString = selectionString, weightString = weightString, 
                isData = isData, color = color, texName = texName )
Beispiel #11
0
    def fromCMGOutput(cls, name, baseDirectory, treeFilename = 'tree.root', chunkString = None, treeName = 'tree', maxN = None, \
            selectionString = None, xSection = -1, weightString = None, 
            isData = False, color = 0, texName = None):
        '''Load a CMG output directory from e.g. unzipped crab output in the 'Chunks' directory structure. 
           Expects the presence of the tree root file and the SkimReport.txt
        ''' 
        from cmg_helpers import read_cmg_normalization
        maxN = maxN if maxN is not None and maxN>0 else None

        # Reading all subdirectories in base directory. If chunkString != None, require cmg output name formatting
        chunkDirectories = []

        # FIXME: Better to loop only over subdirectories in base directory?
        for x in os.listdir(baseDirectory): 
            if os.path.isdir(os.path.join(baseDirectory, x)):
                if not chunkString or (x.startswith(chunkString) and x.endswith('_Chunk')) or x==chunkString:
                    chunkDirectories.append(os.path.join(baseDirectory, x))
                    if len(chunkDirectories)==maxN:break

        logger.debug( "Found %i chunk directories with chunkString %s in base directory %s", \
                           len(chunkDirectories), chunkString, baseDirectory )
        normalization = 0
        files = []
        failedChunks=[]
        goodChunks  =[]

        for i, chunkDirectory in enumerate( chunkDirectories ):
            success = False
            logger.debug("Reading chunk %s", chunkDirectory)

            # Find normalization
            sumW = None
            for root, subFolders, filenames in os.walk( chunkDirectory ):
                # Determine normalization constant
                if 'SkimReport.txt' in filenames:
                    skimReportFilename = os.path.join(root, 'SkimReport.txt')
                    with open(skimReportFilename, 'r') as fin:
                      sumW = read_cmg_normalization(fin)
                      if not sumW:
                          logger.warning( "Read chunk %s and found report '%s' but could not read normalization.",
                                               chunkDirectory, skimReportFilename )
            # Find treefile
            treeFile = None
            for root, subFolders, filenames in os.walk( chunkDirectory ):
                # Load tree file 
                if treeFilename in filenames:
                    treeFile = os.path.join(root, treeFilename)
                    # Checking whether root file is OG and contains a tree
                    if not helpers.checkRootFile(treeFile, checkForObjects=[treeName] ):
                        logger.warning( "Read chunk %s and found tree file '%s' but file looks broken.",  chunkDirectory, treeFile )

            # If both, normalization and treefile are OK call it successful.
            if sumW and treeFile:
                files.append( treeFile )
                normalization += sumW
                logger.debug( "Successfully read chunk %s and incremented normalization by %7.2f",  chunkDirectory, sumW )
                success = True
                goodChunks.append( chunkDirectory )

            if not success:
                failedChunks.append( chunkDirectory )

        # Don't allow empty samples
        if len(goodChunks) == 0:
            raise helpers.EmptySampleError("Could not find good CMGOutput chunks for sample {0}. Total number of chunks: {1}. baseDirectory: {2}"\
                  .format(name, len(chunkDirectories), baseDirectory))

        # Log statements
        eff = 100*len(failedChunks)/float( len(chunkDirectories) )
        logger.debug("Loaded CMGOutput sample %s. Total number of chunks : %i. Normalization: %7.2f Bad: %i. Inefficiency: %3.3f", \
                          name, len(chunkDirectories), normalization, len(failedChunks), eff)

        for chunk in failedChunks:
            logger.debug( "Failed to load chunk %s", chunk)
        logger.debug( "Read %i chunks and total normalization of %f", len(files), normalization )
        return cls( name = name, treeName = treeName, files = files, normalization = normalization, 
            selectionString = selectionString, weightString = weightString, xSection = xSection,
            isData = isData, color = color, texName = texName )