예제 #1
0
    def __loadChain(self):
        ''' Load the TChain. Private.
        '''
        if len(self.files) == 0:
            raise helpers.EmptySampleError(
                "Sample {name} has no input files! Can not load.".format(
                    name=self.name))
        else:
            self._chain = ROOT.TChain(self.treeName)
            counter = 0
            for f in self.files:
                logger.debug("Now adding file %s to sample '%s'", f, self.name)
                try:
                    if self.skipCheck or helpers.checkRootFile(
                            f, checkForObjects=[self.treeName]):
                        self._chain.Add(f)
                        counter += 1
                    else:
                        logger.error(
                            "Check of root file failed. Skipping. File: %s", f)
                except IOError as e:
                    logger.error("Could not load file %s", f)
                    #raise e
            if counter == 0:
                raise helpers.EmptySampleError("No root files for sample %s." %
                                               self.name)
            logger.debug("Loaded %i files for sample '%s'.", counter,
                         self.name)

        # Add friends
        if hasattr(
                self, 'friends'
        ):  # Catch cases where cached samples have no default value for friends attribute
            for friend_sample, friend_treeName in self.friends:
                self.chain.AddFriend(friend_sample.chain, friend_treeName)
예제 #2
0
    def reduceFiles( self, factor = 1, to = None ):
        ''' Reduce number of files in the sample
        '''
        len_before = len(self.files)
        norm_before = self.normalization

        if factor!=1:
            #self.files = self.files[:len_before/factor]
            self.files = self.files[0::factor]
            if len(self.files)==0:
                raise helpers.EmptySampleError( "No ROOT files for sample %s after reducing by factor %f" % (self.name, factor) )
        elif to is not None:
            if to>=len(self.files):
                return
            self.files = self.files[:to] 
        else:
            return

        # Keeping track of reduceFile factors
        factor = len(self.files)/float(len_before)
        if hasattr(self, "reduce_files_factor"):
            self.reduce_files_factor *= factor
        else:
            self.reduce_files_factor = factor
        self.normalization = factor*self.normalization if self.normalization is not None else None

        logger.info("Sample %s: Reduced number of files from %i to %i. Old normalization: %r. New normalization: %r. factor: %3.3f", self.name, len_before, len(self.files), norm_before, self.normalization, factor) 

        return
예제 #3
0
    def fromDirectory(cls, name, directory, treeName = "Events", normalization = None, \
                selectionString = None, weightString = None,
                isData = False, color = 0, texName = None, maxN = None):
        '''Load sample from directory or list of directories. If the name is "", enumerate the sample
        '''
        # Work with directories and list of directories
        directories = [directory] if type(directory)==type("") else directory 

        # If no name, enumerate them.
        if not name: name = new_name()

        # find all files
        files = [] 
        for d in directories:
            fileNames = [ os.path.join(d, f) for f in os.listdir(d) if f.endswith('.root') ]
            if len(fileNames) == 0:
                raise helpers.EmptySampleError( "No root files found in directory %s." %d )
            files.extend( fileNames )
        if not treeName: 
            treeName = "Events"
            logger.debug("Argument 'treeName' not provided, using 'Events'.") 

        # restrict files 
        maxN = maxN if maxN is not None and maxN>0 else None
        files = files[:maxN]

        sample =  cls(name = name, treeName = treeName, files = files, normalization = normalization, \
            selectionString = selectionString, weightString = weightString,
            isData = isData, color=color, texName = texName)
        logger.debug("Loaded sample %s from %i files.", name, len(files))
        return sample
예제 #4
0
    def fromDirectory(cls, name, directory, color=0, texName=None, maxN=None):
        '''Load sample from directory or list of directories. If the name is "", enumerate the sample
        '''
        # Work with directories and list of directories
        directories = [directory] if type(directory) == type("") else directory

        # If no name, enumerate them.
        if not name: name = newName()

        # find all files
        files = []
        for d in directories:
            fileNames = [
                os.path.join(d, f) for f in os.listdir(d)
                if f.endswith('.root')
            ]
            if len(fileNames) == 0:
                raise helpers.EmptySampleError(
                    "No root files found in directory %s." % d)
            files.extend(fileNames)

        # restrict files
        maxN = maxN if maxN is not None and maxN > 0 else None
        files = files[:maxN]

        return cls(name=name, files=files, color=color, texName=texName)
예제 #5
0
    def fromDPMDirectory(cls, name, directory, redirector='root://hephyse.oeaw.ac.at/', treeName = "Events", normalization = None, xSection = -1, \
                selectionString = None, weightString = None,
                isData = False, color = 0, texName = None, maxN = None, noCheckProxy=False):

        # Work with directories and list of directories
        directories = [directory] if type(directory) == type("") else directory
        if not all([d.startswith("/dpm") for d in directories]):
            raise ValueError("DPM directories do not start with /dpm/")

        # If no name, enumerate them.
        if not name: name = new_name()

        # Renew proxy
        from RootTools.core.helpers import renew_proxy
        proxy_path = os.path.expandvars('$HOME/private/.proxy')
        if not noCheckProxy:
            proxy = renew_proxy(proxy_path)
        else:
            proxy = proxy_path
            logger.info(
                "Not checking your proxy. Asuming you know it's still valid.")
        logger.info("Using proxy %s" % proxy)

        files = []
        for d in directories:
            cmd = ["xrdfs", redirector, "ls", d]
            fileList = []
            for i in range(10):
                try:
                    fileList = [
                        file for file in subprocess.check_output(cmd).split(
                            "\n")[:-1]
                    ]
                    break
                except:
                    if i < 9: pass
            counter = 0
            for filename in fileList:
                if filename.endswith(".root"):
                    files.append(redirector + os.path.join(d, filename))
                    counter += 1
                if maxN is not None and maxN > 0 and len(files) >= maxN:
                    break
            if counter == 0:
                raise helpers.EmptySampleError(
                    "No root files found in directory %s." % d)

        sample =  cls(name = name, treeName = treeName, files = files, normalization = normalization, xSection = xSection,\
            selectionString = selectionString, weightString = weightString,
            isData = isData, color=color, texName = texName)
        logger.debug("Loaded sample %s from %i files.", name, len(files))
        return sample
예제 #6
0
    def fromDirectory(cls, name, directory, redirector = None, treeName = "Events", normalization = None, xSection = -1, \
                selectionString = None, weightString = None,
                isData = False, color = 0, texName = None, maxN = None, skipCheck = False):
        '''Load sample from directory or list of directories. If the name is "", enumerate the sample
        '''
        # Work with directories and list of directories
        directories = [directory] if type(directory) == type("") else directory

        # If no name, enumerate them.
        if not name: name = new_name()

        # find all files
        files = []
        for d in directories:
            if redirector is None:
                fileNames = [
                    os.path.join(d, f) for f in os.listdir(d)
                    if f.endswith('.root')
                ]
                logger.debug("Found %i files in directory %s", len(fileNames),
                             d)
            else:
                cmd = "xrdfs %s ls %s" % (redirector, d)
                p = subprocess.Popen([cmd],
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.STDOUT)
                fileNames = [
                    redirector + '/' + f.rstrip("\n")
                    for f in p.stdout.readlines() if f.endswith('.root\n')
                ]
                logger.debug("Found %i files in directory (xrootd) %s",
                             len(fileNames), d)
            if len(fileNames) == 0:
                raise helpers.EmptySampleError(
                    "No root files found in directory %s." % d)
            files.extend(fileNames)

        if not treeName:
            treeName = "Events"
            logger.debug("Argument 'treeName' not provided, using 'Events'.")

        # restrict files
        maxN = maxN if maxN is not None and maxN > 0 else None
        files = files[:maxN]

        sample =  cls(name = name, treeName = treeName, files = files, normalization = normalization, xSection = xSection,\
            selectionString = selectionString, weightString = weightString,
            isData = isData, color=color, texName = texName, skipCheck = skipCheck)
        logger.debug("Loaded sample %s from %i files.", name, len(files))
        return sample
예제 #7
0
    def __init__(self, 
            name, 
            treeName , 
            files = [], 
            normalization = None, 
            selectionString = None, 
            weightString = None,
            xSection = -1,
            isData = False, 
            color = 0, 
            texName = None):
        ''' Handling of sample. Uses a TChain to handle root files with flat trees.
            'name': Name of the sample, 
            'treeName': name of the TTree in the input files
            'normalization': can be set in order to later calculate weights, 
            e.g. to total number of events befor all cuts or the sum of NLO gen weights
            'selectionString': sample specific string based selection (can be list of strings)
            'weightString': sample specific string based weight (can be list of strings)
            'xSection': cross section of the sample
            'isData': Whether the sample is real data or not (simulation)
            'color': ROOT color to be used in plot scripts
            'texName': ROOT TeX string to be used in legends etc.
        '''

        self.name = name
        self.treeName = treeName
        self.files = files
        self.xSection = xSection

        if not len(self.files)>0:
          raise helpers.EmptySampleError( "No ROOT files for sample %s! Files: %s" % (self.name, self.files) )

        self.normalization = normalization
        self._chain = None
       
        self.__selectionStrings = [] 
        self.setSelectionString( selectionString )

        self.__weightStrings = [] 
        self.setWeightString( weightString )

        self.isData = isData
        self.color = color
        self.texName = texName if not texName is None else name

        # Other samples. Add friend elements (friend, treeName)
        self.friends = []
             
        logger.debug("Created new sample %s with %i files, treeName %s,  selectionStrings %r and weightStrings %r.", 
            name, len(self.files), treeName, self.__selectionStrings, self.__weightStrings)
예제 #8
0
    def __init__(self, name, files = [],  color = 0, texName = None):
        ''' Base class constructor for all sample classes.
            'name': Name of the sample, 
            'color': ROOT color to be used in plot scripts
            'texName': ROOT TeX string to be used in legends etc.
        '''

        self.name  = name
        self.files = files

        if not len(self.files)>0:
           raise helpers.EmptySampleError( "No ROOT files for sample %s! Files: %s" % (self.name, self.files) )
 
        self.color = color
        self.texName = texName if not texName is None else name
             
        logger.debug("Created new sample %s with %i files.", name, len(self.files))
예제 #9
0
    def __init__(self, name, files=[], color=0, texName=None):
        ''' Base class constructor for all sample classes.
            'name': Name of the sample, 
            'color': ROOT color to be used in plot scripts
            'texName': ROOT TeX string to be used in legends etc.
        '''

        super(FWLiteSample, self).__init__(name=name,
                                           files=files,
                                           normalization=None,
                                           xSection=None,
                                           isData=None,
                                           color=color,
                                           texName=texName)

        if not len(self.files) > 0:
            raise helpers.EmptySampleError(
                "No ROOT files for sample %s! Files: %s" %
                (self.name, self.files))

        logger.debug("Created new sample %s with %i files.", name,
                     len(self.files))
예제 #10
0
    def fromCMGCrabDirectory(cls, name, baseDirectory, treeFilename = 'tree.root', treeName = 'tree', maxN = None, \
            selectionString = None, weightString = None,
            isData = False, color = 0, texName = None):
        '''Load a CMG crab output directory
        ''' 
        import tarfile
        from cmg_helpers import read_cmg_normalization

        maxN = maxN if maxN is not None and maxN>0 else None

        # Walk through all subdirectories and pick up pairs of files '..._n.root and ..._n.tgz where n is the job number'
        treeFiles = {}
        zipFiles  = {}
        for root, subFolders, filenames in os.walk( baseDirectory ):
            for filename in filenames:
                base, ext = os.path.splitext( filename )
                try:
                    n = int(base.split('_')[-1])
                except:
                    # filename is not of the form 'xyz_n' where n is the job number
                    continue
                # add the tgz and files to the dict.   
                filename_ = os.path.join(root, filename)
                if ext=='.root':
                    treeFiles[n] = filename_
                if ext=='.tgz':
                    zipFiles[n] = filename_
        # Find pairs of zip and root files
        pairs = set(zipFiles.keys()) & set(treeFiles.keys())
        n_jobs = len( set(zipFiles.keys()) | set(treeFiles.keys()) )

        normalization = 0
        files = []
        failedJobs = []
        for n in pairs:
            sumW = None
            tf = tarfile.open( zipFiles[n], 'r:gz' )
            for f in tf.getmembers():
                if "SkimReport.txt" in f.name:
                    sumW = read_cmg_normalization(tf.extractfile(f))
                if sumW is not None: break
            if sumW is None:
                logger.warning( "No normalization found when reading tar file %s", zipFiles[n] )
            tf.close()

            # Check treefile for whether the tree 'treeName' can be found.
            # This is an implicit check for broken, recovered or otherwise corrupted root files.
            treeFile = treeFiles[n] if helpers.checkRootFile(treeFiles[n], checkForObjects = [treeName] ) else None

            if treeFile is None: logger.warning( "File %s looks broken. Checked for presence of tree %s.", treeFiles[n] , treeName )

            # If both, normalization and treefile are OK call it successful.
            if sumW and treeFile:
                files.append( treeFile )
                normalization += sumW
                logger.debug( "Successfully read job %i and incremented normalization by %7.2f",  n, sumW )
            else:
                failedJobs.append( n )

        # Don't allow empty samples
        if len(files) == 0:
            raise helpers.EmptySampleError("Could not find valid crab CMG output for sample {0}. Total number of jobs: {1}. baseDirectory: {2}"\
                  .format(name, len(pairs), baseDirectory))

        # Log statements
        eff = 100*len(failedJobs)/float( n_jobs )
        logger.debug("Loaded CMGOutput sample %s. Total number of  jobs: %i, both tgz and root: %i. Normalization: %7.2f Bad: %i. Inefficiency: %3.3f", \
                          name, len(pairs), n_jobs, normalization, len(failedJobs), eff)

        logger.debug( "Read %i chunks and total normalization of %f", len(files), normalization )
        return cls( name = name, treeName = treeName, files = files, normalization = normalization, 
                selectionString = selectionString, weightString = weightString, 
                isData = isData, color = color, texName = texName )
예제 #11
0
    def fromCMGOutput(cls, name, baseDirectory, treeFilename = 'tree.root', chunkString = None, treeName = 'tree', maxN = None, \
            selectionString = None, weightString = None, 
            isData = False, color = 0, texName = None):
        '''Load a CMG output directory from e.g. unzipped crab output in the 'Chunks' directory structure. 
           Expects the presence of the tree root file and the SkimReport.txt
        ''' 
        from cmg_helpers import read_cmg_normalization
        maxN = maxN if maxN is not None and maxN>0 else None

        # Reading all subdirectories in base directory. If chunkString != None, require cmg output name formatting
        chunkDirectories = []

        # FIXME: Better to loop only over subdirectories in base directory?
        for x in os.listdir(baseDirectory): 
            if os.path.isdir(os.path.join(baseDirectory, x)):
                if not chunkString or (x.startswith(chunkString) and x.endswith('_Chunk')) or x==chunkString:
                    chunkDirectories.append(os.path.join(baseDirectory, x))
                    if len(chunkDirectories)==maxN:break

        logger.debug( "Found %i chunk directories with chunkString %s in base directory %s", \
                           len(chunkDirectories), chunkString, baseDirectory )
        normalization = 0
        files = []
        failedChunks=[]
        goodChunks  =[]

        for i, chunkDirectory in enumerate( chunkDirectories ):
            success = False
            logger.debug("Reading chunk %s", chunkDirectory)

            # Find normalization
            sumW = None
            for root, subFolders, filenames in os.walk( chunkDirectory ):
                # Determine normalization constant
                if 'SkimReport.txt' in filenames:
                    skimReportFilename = os.path.join(root, 'SkimReport.txt')
                    with open(skimReportFilename, 'r') as fin:
                      sumW = read_cmg_normalization(fin)
                      if not sumW:
                          logger.warning( "Read chunk %s and found report '%s' but could not read normalization.",
                                               chunkDirectory, skimReportFilename )
            # Find treefile
            treeFile = None
            for root, subFolders, filenames in os.walk( chunkDirectory ):
                # Load tree file 
                if treeFilename in filenames:
                    treeFile = os.path.join(root, treeFilename)
                    # Checking whether root file is OG and contains a tree
                    if not helpers.checkRootFile(treeFile, checkForObjects=[treeName] ):
                        logger.warning( "Read chunk %s and found tree file '%s' but file looks broken.",  chunkDirectory, treeFile )

            # If both, normalization and treefile are OK call it successful.
            if sumW and treeFile:
                files.append( treeFile )
                normalization += sumW
                logger.debug( "Successfully read chunk %s and incremented normalization by %7.2f",  chunkDirectory, sumW )
                success = True
                goodChunks.append( chunkDirectory )

            if not success:
                failedChunks.append( chunkDirectory )

        # Don't allow empty samples
        if len(goodChunks) == 0:
            raise helpers.EmptySampleError("Could not find good CMGOutput chunks for sample {0}. Total number of chunks: {1}. baseDirectory: {2}"\
                  .format(name, len(chunkDirectories), baseDirectory))

        # Log statements
        eff = 100*len(failedChunks)/float( len(chunkDirectories) )
        logger.debug("Loaded CMGOutput sample %s. Total number of chunks : %i. Normalization: %7.2f Bad: %i. Inefficiency: %3.3f", \
                          name, len(chunkDirectories), normalization, len(failedChunks), eff)

        for chunk in failedChunks:
            logger.debug( "Failed to load chunk %s", chunk)
        logger.debug( "Read %i chunks and total normalization of %f", len(files), normalization )
        return cls( name = name, treeName = treeName, files = files, normalization = normalization, 
            selectionString = selectionString, weightString = weightString,
            isData = isData, color = color, texName = texName )