コード例 #1
0
def expand_file_list(fileEntries):
    for fileEntry in fileEntries:
        if fileEntry.find("*") != -1:
            for file in castor.nslsl(clean_name(fileEntry)):
                yield "rfio:" + file['path']
        else:
            yield fileEntry
コード例 #2
0
def local_version_current(castor_file):
    ''' Check if the local copy of [castor_file] exists and is up to date '''
    local_file = local_version(castor_file)
    if not os.path.exists(local_file):
        return False
    local_stat = os.stat(local_file)
    # Get last mod time of local file
    #local_mtime = time.ctime(local_stat.st_mtime)
    local_mtime = time.localtime(local_stat.st_mtime)
    local_size = local_stat.st_size
    # This call is memoized
    castor_stat = list(castor.nslsl(castor_file))[0]
    castor_size = castor_stat["size"]
    #castor_mtime = time.mktime(
    #    unixtime_from_timestamp(castor_stat["Last modify"]))
    castor_mtime = castor_stat['time']
    #print local_mtime, castor_mtime
    # Check sizes are same
    if local_size != castor_size:
        print "Local copy of", castor_file, " is the wrong size: %i != %i" % (
            local_size, castor_size)
        return False
    # Check local file is newer
    if local_mtime < castor_mtime:
        print "Local copy of", castor_file, " is outdated!"
        print "local:", time.asctime(local_mtime), \
                "castor:", time.asctime(castor_mtime)
        return False
    return True
コード例 #3
0
def local_version_current(castor_file, local_directory = LOCAL_DIRECTORY):
    ''' Check if the local copy of [castor_file] exists and is up to date '''
    local_file = local_version(castor_file, local_directory)
    if not os.path.exists(local_file):
        return False
    local_stat = os.stat(local_file)
    # Get last mod time of local file
    #local_mtime = time.ctime(local_stat.st_mtime)
    local_mtime = time.localtime(local_stat.st_mtime)
    local_size = local_stat.st_size
    # This call is memorized
    castor_stat = None
    if is_on_castor(castor_file):
        castor_stat = list(castor.nslsl(castor_file))[0]
    elif is_on_eos(castor_file):
        castor_stat = list(eos.lsl(castor_file))[0]
    else:
        raise ValueError("Invalid fileName = %s !!" % castor_file)
    castor_size = castor_stat["size"]
    #castor_mtime = time.mktime(
    #    unixtime_from_timestamp(castor_stat["Last modify"]))
    castor_mtime = castor_stat['time']
    #print local_mtime, castor_mtime
    # Check sizes are same
    if local_size != castor_size:
        print "Local copy of", castor_file, " is the wrong size: %i != %i"% (local_size, castor_size)
        return False
    # Check local file is newer
    if local_mtime < castor_mtime:
        print "Local copy of", castor_file, " is outdated!"
        print "local:", time.asctime(local_mtime), \
                "castor:", time.asctime(castor_mtime)
        return False
    return True
コード例 #4
0
def castor_source(directory):
    " Build a generator that lists file in a castor directory, sorted by time "
    print "<castor_source>", directory
    # First sort by time
    files = list(castor.nslsl(directory))
    # Sort by time
    files.sort(key=lambda x: x['time'])
    for file_info in files:
        if not file_info['size']:
            print "Warning <castor_source>: file %s has size 0" % \
                    file_info['path']
        yield file_info
コード例 #5
0
ファイル: harvesting.py プロジェクト: aashaqshah/cmssw-1
def castor_source(directory):
    " Build a generator that lists file in a castor directory, sorted by time "
    print "<castor_source>", directory
    # First sort by time
    files = list(castor.nslsl(directory))
    # Sort by time
    files.sort(key = lambda x: x['time'])
    for file_info in files:
        if not file_info['size']:
            print "Warning <castor_source>: file %s has size 0" % \
                    file_info['path']
        yield file_info
コード例 #6
0
def crabdir_source(directory):
    #print "Getting list of files from crab dir:", directory
    crab_files = list(crab.map_lfns_to_castor(crab.lfns(directory)))

    good_ids = set(get_crab_id(file) for file in crab_files)
    # Get good crab 'ids' - a tuple of the crab job, retry, and random code
    # Figure out what castor directory we are in so we can get all the
    # information.
    #print "getting all files"
    if crab_files:
        castor_dir = os.path.dirname(crab_files[0]) + '/'
        castor_files_info = castor.nslsl(castor_dir)
        for file_info in castor_files_info:
            if get_crab_id(file_info['file']) in good_ids:
                yield file_info
コード例 #7
0
ファイル: harvesting.py プロジェクト: aashaqshah/cmssw-1
def crabdir_source(directory):
    #print "Getting list of files from crab dir:", directory
    crab_files = list(crab.map_lfns_to_castor(crab.lfns(directory)))

    good_ids = set(get_crab_id(file) for file in crab_files)
    # Get good crab 'ids' - a tuple of the crab job, retry, and random code
    # Figure out what castor directory we are in so we can get all the
    # information.
    #print "getting all files"
    if crab_files:
        castor_dir = os.path.dirname(crab_files[0]) + '/'
        castor_files_info = castor.nslsl(castor_dir)
        for file_info in castor_files_info:
            if get_crab_id(file_info['file']) in good_ids:
                yield file_info
コード例 #8
0
    samplesToAnalyze = recoSampleDefinitionsTauIdCommissioning_7TeV['SAMPLES_TO_RUN']
if len(eventSelectionsToAnalyze) == 0:
    eventSelectionsToAnalyze = eventSelections.keys()

print "samplesToAnalyze = %s" % samplesToAnalyze
print "eventSelectionsToAnalyze = %s" % eventSelectionsToAnalyze

def runCommand(commandLine):
    sys.stdout.write("%s\n" % commandLine)
    args = shlex.split(commandLine)
    retVal = subprocess.Popen(args, stdout = subprocess.PIPE)
    retVal.wait()
    return retVal

# find and delete "bad" files
files = [ file_info for file_info in castor.nslsl(harvestingFilePath) ]
for file in files:
    if file['size'] < 1000:
        runCommand("%s %s" % (executable_rfrm, file['path']))

#--------------------------------------------------------------------------------
#
# build config files for running FWLiteTauFakeRateAnalyzer macro on lxbatch
#
fileNames_FWLiteTauFakeRateAnalyzer         = {}
bsubJobNames_FWLiteTauFakeRateAnalyzer      = {}
bjobListFileNames_FWLiteTauFakeRateAnalyzer = {}
for sampleToAnalyze in samplesToAnalyze:
    fileNames_FWLiteTauFakeRateAnalyzer[sampleToAnalyze]         = {}
    bsubJobNames_FWLiteTauFakeRateAnalyzer[sampleToAnalyze]      = {}
    bjobListFileNames_FWLiteTauFakeRateAnalyzer[sampleToAnalyze] = {}
コード例 #9
0
    return retVal.stdout.readlines()


def format_vstring(list_of_strings):
    retVal = ""
    for i, string_i in enumerate(list_of_strings):
        if i > 0:
            retVal += " "
        retVal += string_i
    return retVal


inputFileNames = []
if inputFilePath.find('/castor/') != -1:
    inputFileNames = [
        '%s' % file_info['path'] for file_info in castor.nslsl(inputFilePath)
    ]
elif inputFilePath.find("/store") != -1:
    inputFileNames = [
        file_info['path'] for file_info in eos.lsl(inputFilePath)
    ]
else:
    inputFileNames = [
        '%s' % os.path.join(inputFilePath, file_name)
        for file_name in os.listdir(inputFilePath)
    ]

#print "inputFileNames = %s" % inputFileNames

inputFileNames_matched = []
for inputFileName in inputFileNames:
if sample_type == 'Z':
    inputFilePath = '/data1/veelken/CMSSW_5_2_x/skims/genHtautauLeptonPairAcc/user/veelken/CMSSW_5_2_x/skims/'
    inputFile_regex = \
      r"[a-zA-Z0-9_/:.]*genTauLeptonsPairAccSkim_ZplusJets_%s_(?P<gridJob>\d*)(_(?P<gridTry>\d*))*_(?P<hash>[a-zA-Z0-9]*).root" % channel
elif sample_type == 'Higgs':
    inputFilePath = '/data1/veelken/CMSSW_5_2_x/skims/genHtautauLeptonPairAcc/user/v/veelken/CMSSW_5_2_x/skims/'
    inputFile_regex = \
      r"[a-zA-Z0-9_/:.]*genTauLeptonsPairAccSkim_(ggHiggs|ggPhi|vbfHiggs)%s_%s_(?P<gridJob>\d*)(_(?P<gridTry>\d*))*_(?P<hash>[a-zA-Z0-9]*).root" % (massPoint, channel)
else:
    raise ValueError("Invalid sample type = %s !!" % sample_type)

# check if name of inputFile matches regular expression
inputFileNames = []
files = None
if inputFilePath.startswith('/castor/'):
    files = [ "".join([ "rfio:", file_info['path'] ]) for file_info in castor.nslsl(inputFilePath) ]
elif inputFilePath.startswith('/store/'):
    files = [ file_info['path'] for file_info in eos.lsl(inputFilePath) ]
else:
    files = [ "".join([ "file:", inputFilePath, file ]) for file in os.listdir(inputFilePath) ]
for file in files:
    #print "file = %s" % file
    inputFile_matcher = re.compile(inputFile_regex)
    if inputFile_matcher.match(file):
        inputFileNames.append(file)
#print "inputFileNames = %s" % inputFileNames 

process.source.fileNames = cms.untracked.vstring(inputFileNames)
#--------------------------------------------------------------------------------

process.testSVfitTrackLikelihoodProductionSequence = cms.Sequence()
        #    (to avoid exception from castor that inputFilePath does not exists)
        if not evtSel in recoSampleDefinitionsTauIdCommissioning_7TeV['RECO_SAMPLES'][sample]['jobs']:
            continue
                
        inputFilePath = os.path.join(castorFilePath, evtSel, version, sample) + '/' # CV: add trailing '/'
        outputFilePath = inputFilePath
        print "harvesting files in inputFilePath = %s," % inputFilePath \
             + " copying harvested files to outputFilePath = %s..." % outputFilePath
       
        plot_regex = r"dont match anything"
        skim_regex = r"%s" % recoSampleDefinitionsTauIdCommissioning_7TeV['ROOT_FILE_NAMES'][evtSel].replace(
            ".root", "_(?P<gridJob>\d*)(_(?P<gridTry>\d*))*_(?P<hash>[a-zA-Z0-9]*).root")

        if deleteOldHarvestFiles:
            print "deleting old harvest files..."
            files = [ file_info['path'] for file_info in castor.nslsl(outputFilePath) ]
            harvest_regex = r"[a-zA-Z0-9_/:.]*skim__%s_%s_%s_chunk_(?P<jobId>\d*)_(?P<hash>[a-zA-Z0-9]*).root" % (sample, evtSel, version)
            harvest_regex_matcher = re.compile(harvest_regex)
            for file in files:
                if harvest_regex_matcher.match(file):
                    print "deleting file = %s" % file
                    os.system('rfrm %s' % file)
        
        def matches_either(files):
            # Check if the file matches either of the regexes we are interested in.
            # We do this to skip extra files in the directories before we pass them to
            # clean_by_crab_id
            skim_matcher = re.compile(skim_regex)
            for file in files:
                 #print " unmatched file: %s" % file['path']
                 if skim_matcher.match(file['file']):
bsubJobNames = {}
for sampleToAnalyze in samplesToAnalyze:

    print "checking sample %s" % sampleToAnalyze

    bsubFileNames[sampleToAnalyze] = {}
    bsubScriptFileNames[sampleToAnalyze] = {}
    bsubJobNames[sampleToAnalyze] = {}

    inputFilePath = samples[sampleToAnalyze]['skimFilePath']
    print " inputFilePath = %s" % inputFilePath

    inputFileNames = None
    if inputFilePath.find("/castor") != -1:
        inputFileNames = [
            file_info['path'] for file_info in castor.nslsl(inputFilePath)
        ]
    elif inputFilePath.find("/store") != -1:
        inputFileNames = [
            file_info['path'] for file_info in eos.lsl(inputFilePath)
        ]
    else:
        inputFileNames = [file for file in os.listdir(inputFilePath)]
    #print " inputFileNames = %s" % inputFileNames

    inputFileNames_matched = [
        os.path.basename(input_file)
        for input_file in input_mapper(inputFileNames, sampleToAnalyze)
    ]
    #print "inputFileNames_matched = %s" % inputFileNames_matched
    print "--> found %i inputFiles" % len(inputFileNames_matched)
    if metResolution is not None:
        retVal = "MEtRes%1.0f" % metResolution
        retVal = retVal.replace(".", "_")
    return retVal

# CV: fill mapping of fileName to number of events contained in file into temporary cache
#     in order to reduce castor file I/O
print "initializing mapping of fileNames to number of events contained in each file..."
numEventsMap = {}
fileNamesToMap = []
for sampleToAnalyze in samplesToAnalyze:
    for channelToAnalyze in channelsToAnalyze:
        inputFilePath_channel = os.path.join(inputFilePath, version, channelToAnalyze)
        inputFileNames = None
        if inputFilePath_channel.find('/castor/') != -1:
            inputFileNames = [ file_info['path'] for file_info in castor.nslsl(inputFilePath_channel) ]
        else:
            inputFileNames = os.listdir(inputFilePath_channel)
        for inputFileName in inputFileNames:        
            if inputFileName.find("".join(['_', sampleToAnalyze, '_'])) != -1 or \
               inputFileName.find("".join(['/', sampleToAnalyze, '_'])) != -1:
                fileNamesToMap.append(inputFileName)
                # CV: request inputFiles located on castor to be prestaged
                #     in order to speed-up computation of numbers of events contained in each file
                #     by 'buildConfigFile_SVfitEventHypothesisAnalyzer' function later
                if inputFilePath_channel.find('/castor/') != -1:
                    commandLine = '%s -M %s -U myfiles' % (executable_stager, inputFileName)
                    runCommand(commandLine)
print " done."

#--------------------------------------------------------------------------------
import subprocess

channel = 'ZtoMuTau_tauIdEff'
configFile = 'produceTauPtResPATTuple_cfg.py'
analysisFilePath = getAnalysisFilePath(channel)
jobId = '2011Aug18'

version = 'V2exp'

samplesToAnalyze = ['Ztautau_powheg']

outputFilePath = "/castor/cern.ch/user/v/veelken/CMSSW_4_2_x/PATtuples/TauPtRes/V2exp/"

# Get all the skim files from the castor directory
skimFilePath = getBatchHarvestLocation(channel)
skim_files = [file_info['path'] for file_info in castor.nslsl(skimFilePath)]

if not os.path.isdir("lxbatch_pattuple"):
    print 'Creating directory to store the lxbatch jobs: lxbatch_pattuple'
    os.mkdir('lxbatch_pattuple')

if not os.path.isdir("lxbatch_pat_log"):
    print 'Creating directory to store the lxbatch logs: lxbatch_pat_log'
    os.mkdir('lxbatch_pat_log')

inputFile_regex = \
  r"[a-zA-Z0-9_/:.]*skim_ZtoMuTau_tauIdEff_(?P<sample>\w+)_%s_chunk_(?P<gridJob>\d*)_(?P<gridTry>\d*).root" % jobId
inputFile_matcher = re.compile(inputFile_regex)


# Function that maps a sample name to its skim file
コード例 #15
0
def submitAnalysisToLXBatch(configFile=None,
                            channel=None,
                            samples=None,
                            samplesToAnalyze=None,
                            samplesToSkip=None,
                            disableFactorization=False,
                            disableSysUncertainties=False,
                            disableZrecoilCorrections=False,
                            script_directory=None,
                            cfgdir='lxbatch',
                            inputFileMap=None,
                            outputFileMap=None,
                            outputDirectory=None,
                            queue='1nd',
                            enableEventDumps=False,
                            enableFakeRates=False,
                            processName=None,
                            changeTauId=None,
                            saveFinalEvents=False,
                            jobExtention=''):
    """
    Submit analysis job (event selection, filling of histogram)
    to local machine
    """

    # check that configFile, channel, samples and jobId
    # parameters are defined and non-empty
    for param in ["configFile", "channel", "samples", "outputDirectory"]:
        if locals()[param] is None:
            raise ValueError("Undefined '%s' parameter!!" % param)

    jobId = reg.getJobId(channel)

    # If not specified take script directory from user preferences.
    if script_directory is None:
        script_directory = reg.getHarvestScriptLocation()

    # Make sure our output file for the scripts is okay
    if not os.path.exists(script_directory):
        os.makedirs(script_directory)

    # Get all the files in our output directory that have non-zero size
    tmp_files = set(x['file'] for x in castor.nslsl(outputDirectory)
                    if x['size'])

    # Keep track of the files we care about
    relevant_files = set([])

    submit_file_name = 'submit_lxbatch_analysis_' + jobId + '.sh'
    with open(submit_file_name, 'w') as submit_file:
        # Loop over the samples to be analyzed
        for sample in samples['SAMPLES_TO_ANALYZE']:
            write_comment_header(submit_file, " Sample: " + sample)
            # Skip submitting crab job in case
            #  o list of samples for which crab jobs are to be submitted has been
            #    explicitely specified
            #  o sample has explicitely been requested to be skipped
            if samplesToAnalyze:
                if sample not in samplesToAnalyze:
                    print "Skipping", sample
                    continue
            if samplesToSkip:
                if sample in samplesToSkip:
                    print "Skipping", sample
                    continue

            sample_info = samples['RECO_SAMPLES'][sample]

            # Make job info
            jobInfo = {'channel': channel, 'sample': sample, 'id': jobId}

            # Now build the scripts to feed to bsub
            # Find the input files
            input_files = list(inputFileMap(channel, sample, jobId))

            if len(input_files) > 0:
                print("Submitting %s in %i part(s)" %
                      (sample, len(input_files)))
            else:
                print("No local input files for %s found !!" % sample)

            for job, file in enumerate(input_files):

                input_files = [file]
                # The None in the tuple indicates this file has no dependencies in
                # the batch job.
                input_files_and_jobs = [(None, file) for file in input_files]
                # Need to prepend file:, and strip off the directory since we
                # always have bsub rfcp the input files to the working
                # directory.
                input_files_for_cfgOptions = [
                    'file:' + os.path.basename(file) for file in input_files
                ]

                output_file = outputFileMap(channel, sample, jobId)
                input_file_hash = jobtools.hash_files(input_files,
                                                      add_time=False)
                # Add the hash of the input file so we know the provenance of all
                # files
                output_file = os.path.join(
                    outputDirectory,
                    output_file.replace(
                        '.root',
                        '_' + str(job) + '_' + input_file_hash + '.root'))

                relevant_files.add(os.path.basename(output_file))

                # Uncomment to skip rerunning of old jobs
                #if os.path.basename(output_file) in tmp_files:
                #print " done; skipping", output_file
                #continue

                # First, prepare the configuration file
                newConfigFile = getNewConfigFileName(configFile,
                                                     cfgdir,
                                                     sample,
                                                     jobId,
                                                     index=job,
                                                     label="@lxbatch")

                write_comment_header(submit_file, " cfg: " + newConfigFile)
                #--------------------------------------------------------------------
                # CV: temporary "hack" for producing (ED)Ntuples/skims for tau id. efficiency measurement
                jobCustomizations = []
                jobCustomizations.append(
                    "if hasattr(process, 'ntupleOutputModule'):")
                jobCustomizations.append(
                    "    process.ntupleOutputModule.fileName = '%s'" %
                    os.path.basename(output_file))
                jobCustomizations.append(
                    "if hasattr(process, 'patTupleOutputModule'):")
                jobCustomizations.append(
                    "    process.patTupleOutputModule.fileName = '%s'" %
                    os.path.basename(output_file))
                jobCustomizations.append(
                    "if hasattr(process, 'skimOutputModule'):")
                jobCustomizations.append(
                    "    process.skimOutputModule.fileName = '%s'" %
                    os.path.basename(output_file))
                HLTprocessName = 'HLT'
                if 'hlt' in samples['RECO_SAMPLES'][sample].keys():
                    HLTprocessName = samples['RECO_SAMPLES'][sample][
                        'hlt'].getProcessName()
                    jobCustomizations.append("if hasattr(process, 'hltMu'):")
                    jobCustomizations.append(
                        "    process.hltMu.selector.src = cms.InputTag('TriggerResults::%s')"
                        % HLTprocessName)
                    jobCustomizations.append(
                        "if hasattr(process, 'patTrigger'):")
                    jobCustomizations.append(
                        "    process.patTrigger.processName = '%s'" %
                        HLTprocessName)
                    jobCustomizations.append(
                        "if hasattr(process, 'patTriggerEvent'):")
                    jobCustomizations.append(
                        "    process.patTriggerEvent.processName = '%s'" %
                        HLTprocessName)
                if samples['RECO_SAMPLES'][sample]['type'] == 'Data':
                    jobCustomizations.append(
                        "if hasattr(process, 'prePatProductionSequence')" +
                        " and hasattr(process, 'prePatProductionSequenceGen'):"
                    )
                    jobCustomizations.append(
                        "    process.prePatProductionSequence.remove(process.prePatProductionSequenceGen)"
                    )
                    jobCustomizations.append(
                        "if hasattr(process, 'ntupleProducer'):")
                    jobCustomizations.append(
                        "    if hasattr(process.ntupleProducer.sources, 'tauGenJets'):"
                    )
                    jobCustomizations.append(
                        "        delattr(process.ntupleProducer.sources, 'tauGenJets')"
                    )
                    jobCustomizations.append(
                        "    if hasattr(process.ntupleProducer.sources, 'genJets'):"
                    )
                    jobCustomizations.append(
                        "        delattr(process.ntupleProducer.sources, 'genJets')"
                    )
                    jobCustomizations.append(
                        "    if hasattr(process.ntupleProducer.sources, 'genPhaseSpaceEventInfo'):"
                    )
                    jobCustomizations.append(
                        "        delattr(process.ntupleProducer.sources, 'genPhaseSpaceEventInfo')"
                    )
                    jobCustomizations.append(
                        "    if hasattr(process.ntupleProducer.sources, 'genPileUpEventInfo'):"
                    )
                    jobCustomizations.append(
                        "        delattr(process.ntupleProducer.sources, 'genPileUpEventInfo')"
                    )
                jobCustomizations.append(
                    "if hasattr(process, 'patTriggerEventSequence') and hasattr(process, 'patTriggerSequence'):"
                )
                jobCustomizations.append(
                    "    process.patDefaultSequence.replace(process.patTriggerEventSequence,"
                )
                jobCustomizations.append(
                    "                                       process.patTriggerSequence + process.patTriggerEventSequence)"
                )
                #jobCustomizations.append("print process.dumpPython()")
                #--------------------------------------------------------------------

                prepareConfigFile(
                    configFile=configFile,
                    jobInfo=jobInfo,
                    newConfigFile=newConfigFile,
                    sample_infos=samples,
                    disableFactorization=disableFactorization,
                    disableSysUncertainties=disableSysUncertainties,
                    disableZrecoilCorrections=disableZrecoilCorrections,
                    # We always copy the input files to the local directory
                    # before running cmsRun, so just take the basname
                    input_files=input_files_for_cfgOptions,
                    output_file=os.path.basename(output_file),
                    enableEventDumps=enableEventDumps,
                    enableFakeRates=enableFakeRates,
                    processName=processName,
                    saveFinalEvents=saveFinalEvents,
                    changeTauId=changeTauId,
                    customizations=jobCustomizations)

                # Build a function that constructs our log file name given the
                # job file hash.
                if not os.path.exists('lxbatch_log'):
                    os.makedirs('lxbatch_log')

                def log_file_maker(job_hash):
                    return os.path.join(
                        'lxbatch_log',
                        "_".join(['run', channel, sample, jobId, job_hash]) +
                        '.log')

                # Build our batch job
                jobname, script = jobtools.make_bsub_script(
                    output_file,
                    input_files_and_jobs,
                    log_file_maker,
                    "cmsRun %s" % newConfigFile,
                    pass_io_files=False)

                bsub_script_file = os.path.join(
                    script_directory, "_".join([
                        'analyze' + jobExtention, sample, 'job',
                        str(job), input_file_hash
                    ]) + '.sh')
                with open(bsub_script_file, 'w') as bsub_script:
                    bsub_script.write(script)
                # Add this bsub to our submission script
                submit_file.write("bsub -q %s < %s\n" %
                                  (queue, bsub_script_file))

        print len(tmp_files)
        garbage = tmp_files - relevant_files
        print len(garbage)
        if garbage:
            print "Found %i files not generated by this job!!" % len(garbage)
            print " You should really run:"
            print " cat ana_garbage.txt | xargs -n 1 -P 10 rfrm"
            with open('ana_garbage.txt', 'w') as garbage_script:
                for file in garbage:
                    garbage_script.write('%s\n' %
                                         os.path.join(outputDirectory, file))
        print "Run ./%s to submit jobs" % submit_file_name
        os.chmod(submit_file_name, 0755)

        return submit_file_name
def buildConfigFile_FWLiteTauFakeRateAnalyzer(sampleToAnalyze, evtSel, version, inputFilePath, tauIds, 
                                              tauJetCandSelection, srcTauJetCandidates, srcMET, intLumiData, hltPaths, srcWeights,
                                              configFilePath, logFilePath, outputFilePath, recoSampleDefinitions):

    """Build cfg.py file to run FWLiteTauFakeRateAnalyzer macro to run on PAT-tuples,
       and fill histograms for passed/failed samples"""

    print "inputFilePath = %s" % inputFilePath

    inputFileNames = None
    if inputFilePath.find('/castor/') != -1:
        inputFileNames = [ file_info['path'] for file_info in castor.nslsl(inputFilePath) ]
    else:
        inputFileNames = os.listdir(inputFilePath)
    #print "inputFileNames = %s" % inputFileNames

    # check if inputFile is PAT-tuple and
    # matches sampleToAnalyze, jobId
    inputFileNames_sample = []
    for inputFileName in inputFileNames:        
        if inputFileName.find("chunk") != -1 and \
           inputFileName.find("".join(['_', sampleToAnalyze, '_'])) != -1:
            # CV: assume that input file gets copied to local directory before FWLiteTauFakeRateAnalyzer macro gets started
            inputFileNames_sample.append(os.path.basename(inputFileName))

    #print(sampleToAnalyze)
    #print(inputFiles_sample)

    if len(inputFileNames_sample) == 0:
        print("Sample %s, evtSel = %s has no input files --> skipping !!" % (sampleToAnalyze, evtSel))
        return

    # find name of associated "process"
    process_matched = None
    processes = recoSampleDefinitions['MERGE_SAMPLES'].keys()
    for process in processes:
        for sample in recoSampleDefinitions['MERGE_SAMPLES'][process]['samples']:
            if sample == sampleToAnalyze:
                process_matched = process

    if not process_matched:
        print("No process associated to sample %s --> skipping !!" % sampleToAnalyze)
        return

    print("building config file(s) for sample %s, evtSel %s..." % (sampleToAnalyze, evtSel))

    processType = recoSampleDefinitions['RECO_SAMPLES'][sampleToAnalyze]['type']

    tauIds_string = make_tauIds_string(tauIds)

    hltPaths_string = None
    if isinstance(hltPaths, dict):
        hltPaths_string = make_inputFileNames_vstring(hltPaths[processType])
    else:
        hltPaths_string = make_inputFileNames_vstring(hltPaths)
    weights_string = make_inputFileNames_vstring(srcWeights[processType])

    configFileNames = []
    outputFileNames = []
    logFileNames    = []

    for inputFileName_sample in inputFileNames_sample:

        inputFileName_regex = r"[a-zA-Z0-9_./]*skim_(?P<sample>\w+?)_chunk_(?P<jobId>\d*)_(?P<hash>[a-zA-Z0-9]*).root"
        inputFileName_matcher = re.compile(inputFileName_regex)
        match = inputFileName_matcher.match(inputFileName_sample)
        if not match:
            raise ValueError("Failed to parse fileName = %s !!" % inputFileName_sample)
        jobId = match.group('jobId')

        outputFileName = 'analyzeTauFakeRateHistograms_%s_%s_%s_chunk_%s.root' % (evtSel, sampleToAnalyze, version, jobId)

        allEvents_DBS = -1
        xSection = 0.0
        if not recoSampleDefinitions['MERGE_SAMPLES'][process_matched]['type'] == 'Data':
            allEvents_DBS = recoSampleDefinitions['RECO_SAMPLES'][sampleToAnalyze]['events_processed']
            xSection = recoSampleDefinitions['RECO_SAMPLES'][sampleToAnalyze]['x_sec']

        config = \
"""
import FWCore.ParameterSet.Config as cms

process = cms.PSet()

process.fwliteInput = cms.PSet(
    fileNames   = cms.vstring('%s'),
    
    maxEvents   = cms.int32(-1),
    
    outputEvery = cms.uint32(1000)
)
    
process.fwliteOutput = cms.PSet(
    fileName  = cms.string('%s')
)

process.tauFakeRateAnalyzer = cms.PSet(
    process = cms.string('%s'),
    type = cms.string('%s'),

    evtSel = cms.string('%s'),

    regions = cms.vstring(
        'P',
        'F',
        'A'
    ),
    
    tauIds = cms.VPSet(
%s
    ),
    
    srcTauJetCandidates = cms.InputTag('%s'),
    tauJetCandSelection = cms.vstring(
%s
    ),

    srcTrigger = cms.InputTag('patTriggerEvent'),
    hltPaths = cms.vstring(%s),
    
    srcMET = cms.InputTag('%s'),

    srcVertices = cms.InputTag('selectedPrimaryVertexPosition'),

    weights = cms.VInputTag(%s),

    # CV: 'srcEventCounter' is defined in TauAnalysis/Skimming/test/skimTauIdEffSample_cfg.py
    srcEventCounter = cms.InputTag('totalEventsProcessed'),
    allEvents_DBS = cms.int32(%i),
    
    xSection = cms.double(%f),
    
    intLumiData = cms.double(%f),

    srcLumiProducer = cms.InputTag('lumiProducer')
)
""" % (inputFileName_sample, outputFileName,
       process_matched, processType, evtSel,
       tauIds_string, srcTauJetCandidates, tauJetCandSelection, hltPaths_string, srcMET, weights_string,
       allEvents_DBS, xSection, intLumiData)

        outputFileNames.append(outputFileName)

        configFileName = "analyzeTauFakeRatePATtuple_%s_%s_%s_cfg.py" % (evtSel, sampleToAnalyze, jobId)
        configFileName_full = os.path.join(configFilePath, configFileName)    
        configFile = open(configFileName_full, "w")
        configFile.write(config)
        configFile.close()
        configFileNames.append(configFileName)

        logFileName = configFileName.replace('_cfg.py', '.log')
        logFileName_full = os.path.join(logFilePath, logFileName)
        logFileNames.append(logFileName)

    retVal = {}
    retVal['inputFileNames']  = inputFileNames_sample
    retVal['configFileNames'] = configFileNames
    retVal['outputFileNames'] = outputFileNames
    retVal['logFileNames']    = logFileNames

    #print " inputFileNames = %s" % inputFileNames_sample
    #print " configFileNames = %s" % configFileNames
    #print " outputFileNames = %s" % outputFileNames
    #print " logFileNames = %s" % logFileNames

    return retVal
def buildConfigFile_FWLiteTauFakeRateAnalyzer(
        sampleToAnalyze, evtSel, version, inputFilePath, tauIds,
        tauJetCandSelection, srcTauJetCandidates, srcMET, intLumiData,
        hltPaths, srcWeights, configFilePath, logFilePath, outputFilePath,
        recoSampleDefinitions):
    """Build cfg.py file to run FWLiteTauFakeRateAnalyzer macro to run on PAT-tuples,
       and fill histograms for passed/failed samples"""

    print "inputFilePath = %s" % inputFilePath

    inputFileNames = None
    if inputFilePath.find('/castor/') != -1:
        inputFileNames = [
            file_info['path'] for file_info in castor.nslsl(inputFilePath)
        ]
    else:
        inputFileNames = os.listdir(inputFilePath)
    #print "inputFileNames = %s" % inputFileNames

    # check if inputFile is PAT-tuple and
    # matches sampleToAnalyze, jobId
    inputFileNames_sample = []
    for inputFileName in inputFileNames:
        if inputFileName.find("chunk") != -1 and \
           inputFileName.find("".join(['_', sampleToAnalyze, '_'])) != -1:
            # CV: assume that input file gets copied to local directory before FWLiteTauFakeRateAnalyzer macro gets started
            inputFileNames_sample.append(os.path.basename(inputFileName))

    #print(sampleToAnalyze)
    #print(inputFiles_sample)

    if len(inputFileNames_sample) == 0:
        print("Sample %s, evtSel = %s has no input files --> skipping !!" %
              (sampleToAnalyze, evtSel))
        return

    # find name of associated "process"
    process_matched = None
    processes = recoSampleDefinitions['MERGE_SAMPLES'].keys()
    for process in processes:
        for sample in recoSampleDefinitions['MERGE_SAMPLES'][process][
                'samples']:
            if sample == sampleToAnalyze:
                process_matched = process

    if not process_matched:
        print("No process associated to sample %s --> skipping !!" %
              sampleToAnalyze)
        return

    print("building config file(s) for sample %s, evtSel %s..." %
          (sampleToAnalyze, evtSel))

    processType = recoSampleDefinitions['RECO_SAMPLES'][sampleToAnalyze][
        'type']

    tauIds_string = make_tauIds_string(tauIds)

    hltPaths_string = None
    if isinstance(hltPaths, dict):
        hltPaths_string = make_inputFileNames_vstring(hltPaths[processType])
    else:
        hltPaths_string = make_inputFileNames_vstring(hltPaths)
    weights_string = make_inputFileNames_vstring(srcWeights[processType])

    configFileNames = []
    outputFileNames = []
    logFileNames = []

    for inputFileName_sample in inputFileNames_sample:

        inputFileName_regex = r"[a-zA-Z0-9_./]*skim_(?P<sample>\w+?)_chunk_(?P<jobId>\d*)_(?P<hash>[a-zA-Z0-9]*).root"
        inputFileName_matcher = re.compile(inputFileName_regex)
        match = inputFileName_matcher.match(inputFileName_sample)
        if not match:
            raise ValueError("Failed to parse fileName = %s !!" %
                             inputFileName_sample)
        jobId = match.group('jobId')

        outputFileName = 'analyzeTauFakeRateHistograms_%s_%s_%s_chunk_%s.root' % (
            evtSel, sampleToAnalyze, version, jobId)

        allEvents_DBS = -1
        xSection = 0.0
        if not recoSampleDefinitions['MERGE_SAMPLES'][process_matched][
                'type'] == 'Data':
            allEvents_DBS = recoSampleDefinitions['RECO_SAMPLES'][
                sampleToAnalyze]['events_processed']
            xSection = recoSampleDefinitions['RECO_SAMPLES'][sampleToAnalyze][
                'x_sec']

        config = \
"""
import FWCore.ParameterSet.Config as cms

process = cms.PSet()

process.fwliteInput = cms.PSet(
    fileNames   = cms.vstring('%s'),
    
    maxEvents   = cms.int32(-1),
    
    outputEvery = cms.uint32(1000)
)
    
process.fwliteOutput = cms.PSet(
    fileName  = cms.string('%s')
)

process.tauFakeRateAnalyzer = cms.PSet(
    process = cms.string('%s'),
    type = cms.string('%s'),

    evtSel = cms.string('%s'),

    regions = cms.vstring(
        'P',
        'F',
        'A'
    ),
    
    tauIds = cms.VPSet(
%s
    ),
    
    srcTauJetCandidates = cms.InputTag('%s'),
    tauJetCandSelection = cms.vstring(
%s
    ),

    srcTrigger = cms.InputTag('patTriggerEvent'),
    hltPaths = cms.vstring(%s),
    
    srcMET = cms.InputTag('%s'),

    srcVertices = cms.InputTag('selectedPrimaryVertexPosition'),

    weights = cms.VInputTag(%s),

    # CV: 'srcEventCounter' is defined in TauAnalysis/Skimming/test/skimTauIdEffSample_cfg.py
    srcEventCounter = cms.InputTag('totalEventsProcessed'),
    allEvents_DBS = cms.int32(%i),
    
    xSection = cms.double(%f),
    
    intLumiData = cms.double(%f),

    srcLumiProducer = cms.InputTag('lumiProducer')
)
""" % (inputFileName_sample, outputFileName,
        process_matched, processType, evtSel,
        tauIds_string, srcTauJetCandidates, tauJetCandSelection, hltPaths_string, srcMET, weights_string,
        allEvents_DBS, xSection, intLumiData)

        outputFileNames.append(outputFileName)

        configFileName = "analyzeTauFakeRatePATtuple_%s_%s_%s_cfg.py" % (
            evtSel, sampleToAnalyze, jobId)
        configFileName_full = os.path.join(configFilePath, configFileName)
        configFile = open(configFileName_full, "w")
        configFile.write(config)
        configFile.close()
        configFileNames.append(configFileName)

        logFileName = configFileName.replace('_cfg.py', '.log')
        logFileName_full = os.path.join(logFilePath, logFileName)
        logFileNames.append(logFileName)

    retVal = {}
    retVal['inputFileNames'] = inputFileNames_sample
    retVal['configFileNames'] = configFileNames
    retVal['outputFileNames'] = outputFileNames
    retVal['logFileNames'] = logFileNames

    #print " inputFileNames = %s" % inputFileNames_sample
    #print " configFileNames = %s" % configFileNames
    #print " outputFileNames = %s" % outputFileNames
    #print " logFileNames = %s" % logFileNames

    return retVal
def buildConfigFile_SVfitEventHypothesisAnalyzer(sampleToAnalyze, channelToAnalyze, metResolution, 
                                                 configFileName_template,
                                                 inputFilePath,
                                                 numInputFilesPerJob, maxEventsPerJob, 
                                                 configFilePath, logFilePath, outputFilePath, numEventsMap = None):

    """Build cfg.py file to run SVfit algorithm and fill histograms of SVfit reconstructed mass""" 

    #print "inputFilePath = %s" % inputFilePath

    inputFileNames = None
    if inputFilePath.find('/castor/') != -1:
        inputFileNames = [ file_info['path'] for file_info in castor.nslsl(inputFilePath) ]
    else:
        inputFileNames = os.listdir(inputFilePath)
    #print "inputFileNames = %s" % inputFileNames

    # check if inputFile matches sampleToAnalyze
    inputFileNames_sample = []
    for inputFileName in inputFileNames:        
        if inputFileName.find("".join(['_', sampleToAnalyze, '_'])) != -1 or \
           inputFileName.find("".join(['/', sampleToAnalyze, '_'])) != -1:
            # CV: assume that input file gets copied to local directory before cmsRun gets started
            inputFileNames_sample.append(os.path.basename(inputFileName))

    #print(sampleToAnalyze)
    #print(inputFileNames_sample)

    if len(inputFileNames_sample) == 0:
        print("Sample %s, channel = %s has no input files --> skipping !!" % (sampleToAnalyze, channelToAnalyze))
        return

    # CV: restrict the number of input files to 50 in order to balance event statistics
    #    (and computing time) for different mass-points
    if len(inputFileNames_sample) > 50:
        inputFileNames_sample = inputFileNames_sample[0:50]

    numInputFiles = len(inputFileNames_sample)
    numInputFileGroups = (numInputFiles / numInputFilesPerJob)
    if (numInputFiles % numInputFilesPerJob) != 0:
        numInputFileGroups = numInputFileGroups + 1

    inputFileNameGroups_sample = []
    skipEvents_sample = []
    for fileId in range(numInputFileGroups):
        inputFileIdx_first = fileId*numInputFilesPerJob
        inputFileIdx_last = inputFileIdx_first + numInputFilesPerJob
        if inputFileIdx_last > len(inputFileNames_sample):
            inputFileIdx_last = len(inputFileNames_sample)
        #print "inputFileIdx: first = %i, last = %i" % (inputFileIdx_first, inputFileIdx_last)
        numEvents = 0
        for inputFileIdx in range(inputFileIdx_first, inputFileIdx_last):
            inputFileName = inputFileNames_sample[fileId]
            numEvents_i = None
            if numEventsMap is not None and numEventsMap.has_key(os.path.basename(inputFileName)):
                numEvents_i = numEventsMap[inputFileName]
            else:
                numEvents_i = getNumEvents(os.path.join(inputFilePath, inputFileNames_sample[fileId]))
            numEvents = numEvents + numEvents_i
        numJobsPerGroup = (numEvents / maxEventsPerJob)
        if (numEvents % maxEventsPerJob) != 0:
            numJobsPerGroup = numJobsPerGroup + 1
        print "group of inputFiles = %s contains %i events --> splitting into %i jobs." % \
          (inputFileNames_sample[inputFileIdx_first:inputFileIdx_last], numEvents, numJobsPerGroup)
        for jobId in range(numJobsPerGroup):
            inputFileNameGroups_sample.append(inputFileNames_sample[inputFileIdx_first:inputFileIdx_last])
            skipEvents_sample.append(jobId*maxEventsPerJob)

    #print "inputFileNameGroups_sample = %s" % inputFileNameGroups_sample

    configFileNames = []
    outputFileNames = []
    logFileNames    = []

    numJobs = len(inputFileNameGroups_sample)

    for jobId in range(numJobs):
        
        inputFileNames_string = "[ "
        for inputFileName_sample in inputFileNameGroups_sample[jobId]:
            inputFileNames_string += "'file:%s', " % inputFileName_sample
        inputFileNames_string += " ]"

        sample_type = None
        sample_type_Z_regex = "[Ztautau|ZplusJets|ZToTauTau]"
        sample_type_Z_matcher = re.compile(sample_type_Z_regex)
        sample_type_Higgs_regex = "(((gg|bb|vbf)(Higgs|Phi))|HToTauTau_M-)[0-9]+"
        sample_type_Higgs_matcher = re.compile(sample_type_Higgs_regex)
        if sample_type_Z_matcher.match(sampleToAnalyze):
            sample_type = 'Z'
        elif sample_type_Higgs_matcher.match(sampleToAnalyze):
            sample_type = 'Higgs'
        else:
            raise ValueError("Failed to determine wether sample = %s is Z or Higgs sample !!" % sampleToAnalyze)

        metResolution_string = "None"
        metResolution_label  = "pfMEtResMC"
        if metResolution is not None:
            metResolution_string = "%f" % metResolution
            metResolution_label  = "pfMEtRes%1.0f" % metResolution
            metResolution_label  = metResolution_label.replace(".", "_")
        
        outputFileName = 'svFitPerformanceAnalysisPlots_%s_%s_%s_%i.root' % \
          (sampleToAnalyze, channelToAnalyze, metResolution_label, jobId + 1)
        outputFileNames.append(outputFileName)
 
        replacements = []
        replacements.append([ 'sample',         "'%s'" % sampleToAnalyze          ])
        replacements.append([ 'sample_type',    "'%s'" % sample_type              ])
        replacements.append([ 'channel',        "'%s'" % channelToAnalyze         ])
        replacements.append([ 'metResolution',  "%s"   % metResolution_string     ])
        replacements.append([ 'skipEvents',     "%i"   % skipEvents_sample[jobId] ])
        replacements.append([ 'maxEvents',      "%i"   % maxEventsPerJob          ])
        replacements.append([ 'inputFileNames', "%s"   % inputFileNames_string    ])
        replacements.append([ 'outputFileName', "'%s'" % outputFileName           ])
                
        configFileName = "svFitPerformanceAnalysisPlots_%s_%s_%s_%i_cfg.py" % \
          (sampleToAnalyze, channelToAnalyze, metResolution_label, jobId)
        configFileName_full = os.path.join(configFilePath, configFileName)
        replaceConfigFileParam(configFileName_template, configFileName_full, replacements)
        configFileNames.append(configFileName)

        logFileName = configFileName.replace('_cfg.py', '.log')
        logFileName_full = os.path.join(logFilePath, logFileName)
        logFileNames.append(logFileName)

    retVal = {}
    retVal['inputFileNames']  = inputFileNameGroups_sample
    retVal['configFileNames'] = configFileNames
    retVal['outputFileNames'] = outputFileNames
    retVal['logFileNames']    = logFileNames

    #print " inputFileNames = %s" % inputFileNames_sample
    #print " configFileNames = %s" % configFileNames
    #print " outputFileNames = %s" % outputFileNames
    #print " logFileNames = %s" % logFileNames

    return retVal
コード例 #19
0
mode = None
if inputFilePath.find('/castor/') == 0: 
    mode = 'castor'
if inputFilePath.find('/store/') == 0: 
    mode = 'eos'    
else:
    mode = 'local'

if jobId is None:
    reg.overrideJobId(channel, '2011Oct30') # CV: need to overwrite this in order to match Mauro's filenames
    jobId = reg.getJobId(channel)
print(" jobId = %s" % jobId)

if mode == 'castor':
    files = [ file_info for file_info in castor.nslsl(inputFilePath) ]
elif mode == 'eos':
    files = [ file_info for file_info in eos.lsl(inputFilePath) ]
else:
    commandLine = '%s %s' % (options['executable_ls'][mode], inputFilePath)
    args = shlex.split(commandLine)
    retval = subprocess.Popen(args, stdout = subprocess.PIPE)
    #retval.wait()

    files = retval.stdout.read().split('\n')
    #print(" files = %s" % files)

fileName_regex = r"(?P<fileName_base>[a-zA-Z0-9_]+)_(?P<gridJob>\d*)(_(?P<gridTry>\d*))*_(?P<hash>[a-zA-Z0-9]*).root"
fileName_matcher = re.compile(fileName_regex)

fileNamesAndProperties_dict = {}
コード例 #20
0
ファイル: crabSitter.py プロジェクト: EricBAdamsUMDCP/cmssw-1
 # read list of files existing in output file path
 if checkJobOutputFiles:
     outputFilePath = outputFilePath_prefix
     if outputFilePath_suffix:
         if not (outputFilePath.endswith('/')
                 or outputFilePath_suffix.startswith('/')):
             outputFilePath += '/'
         outputFilePath += outputFilePath_suffix
     if not outputFilePath.endswith('/'):
         outputFilePath += '/'
     if outputFilePath.find("/castor/") != -1:
         print("checking castor files in outputFilePath = %s" %
               outputFilePath)
         outputFileInfos = [
             outputFileInfo
             for outputFileInfo in castor.nslsl(outputFilePath)
         ]
     elif outputFilePath.find("/dpm/") != -1:
         if publish_data:
             datasetpath_items = datasetpath.split('/')
             for idx in range(len(datasetpath_items)):
                 if len(datasetpath_items[idx]) > 0:
                     outputFilePath += datasetpath_items[idx]
                     if not outputFilePath.endswith('/'):
                         outputFilePath += '/'
                     break
             if not publish_data_name:
                 raise ValueError(
                     "Invalid 'publish_data_name' = %s !!" %
                     publish_data_name)
             outputFilePath += publish_data_name
コード例 #21
0
ファイル: crabSitter.py プロジェクト: aashaqshah/cmssw-1
        if not numJobs:            
            raise ValueError("Failed to read number of jobs from log file %s !!" % crabLogFileName)
        print "numJobs = %i" % numJobs

        # read list of files existing in output file path
        if checkJobOutputFiles:
            outputFilePath = outputFilePath_prefix
            if outputFilePath_suffix:
                if not (outputFilePath.endswith('/') or outputFilePath_suffix.startswith('/')):
                    outputFilePath += '/'
                outputFilePath += outputFilePath_suffix
            if not outputFilePath.endswith('/'):
                outputFilePath += '/'            
            if outputFilePath.find("/castor/") != -1:
                print("checking castor files in outputFilePath = %s" % outputFilePath)
                outputFileInfos = [ outputFileInfo for outputFileInfo in castor.nslsl(outputFilePath) ]
            elif outputFilePath.find("/dpm/") != -1:
                if publish_data:
                    datasetpath_items = datasetpath.split('/')
                    for idx in range(len(datasetpath_items)):
                        if len(datasetpath_items[idx]) > 0:
                            outputFilePath += datasetpath_items[idx]
                            if not outputFilePath.endswith('/'):
                                outputFilePath += '/'
                            break
                    if not publish_data_name:
                        raise ValueError("Invalid 'publish_data_name' = %s !!" % publish_data_name)
                    outputFilePath += publish_data_name
                    if not outputFilePath.endswith('/'):
                        outputFilePath += '/'
                    print("checking DPM files in outputFilePath = %s" % outputFilePath)
コード例 #22
0
#!/usr/bin/env python

import TauAnalysis.Configuration.tools.castor as castor
import TauAnalysis.TauIdEfficiency.tools.castor_mirror2 as castor_mirror

import subprocess
import shlex

# Get all the skim files from the castor directory
sourceFilePath = "/castor/cern.ch/user/v/veelken/CMSSW_4_2_x/PATtuples/TauPtRes/V2exp/"
source_files = [
    file_info['path'] for file_info in castor.nslsl(sourceFilePath)
]

targetFilePath = "/data2/veelken/CMSSW_4_2_x/PATtuples/TauPtRes/V2exp/"

jobId = "2011Aug18"
version = "V2exp"

samplesToCopy = [
    # modify in case you want to submit jobs for some of the samples only...
]

files_to_copy = []

for source_file in source_files:

    if source_file.find("%s%s" % (jobId, version)) == -1:
        continue

    isSampleToCopy = False
コード例 #23
0
process = cms.PSet()

process.fwliteInput = cms.PSet(
    fileNames = cms.vstring(),

    maxEvents = cms.int32(-1),
    
    outputEvery = cms.uint32(1000)
)

#--------------------------------------------------------------------------------
inputFilePath = '/data1/veelken/CMSSW_4_2_x/Ntuples/user/v/veelken/CMSSW_4_2_x/Ntuples/neuralMtautauTraining/v1_5'
inputFileNames = []
if inputFilePath.find('/castor/') != -1:
    inputFileNames = [ 'rfio:%s' % file_info['path'] for file_info in castor.nslsl(inputFilePath) ]
else:
    inputFileNames = [ 'file:%s' % os.path.join(inputFilePath, file_name) for file_name in os.listdir(inputFilePath) ]

inputFile_regex = \
  r"[a-zA-Z0-9_/:.]*neuralMtautauNtuple_(?P<sample>[a-zA-Z0-9_]+)_(?P<gridJob>\d*)(_(?P<gridTry>\d*))*_(?P<hash>[a-zA-Z0-9]*).root"
inputFile_matcher = re.compile(inputFile_regex)

inputFileNames_matched = []
for inputFileName in inputFileNames:
    if inputFile_matcher.match(inputFileName):
	inputFileNames_matched.append(inputFileName)

#print "inputFileNames_matched = %s" % inputFileNames_matched

setattr(process.fwliteInput, "fileNames", cms.vstring(inputFileNames_matched))
コード例 #24
0
                        'data_Mu_Run2010B_Nov4ReReco' ]

    # If this is a list, only the items in the list will be analyzed.
    samplesToAnalyze = []
    #samplesToAnalyze = fake_rate_samples

    # Where we will send the output on castor
    outputPath = reg.getAnalysisFilePath(channel)
    jobId = reg.getJobId(channel)
    # Figure out where our root files were stored for the desired skim
    skimPath = reg.getSkimEvents(channel)

    # Get all the skim files from the castor directory
    skim_files = [os.path.join(skimPath, file) for file in
        filter(lambda x: x.startswith('skim_'), (
        file_info['file'] for file_info in castor.nslsl(skimPath)))]

    def inputFileMapper(channel, sample, jobId):
        for file in skim_files:
            if file.find('_' + sample + '_') != -1:
                yield file

    enableFakeRates = False
    enableSystematics = False
    changeTauId = None
    saveFinalEvents = False
    eventList = None

    submit.submitAnalysisToLXBatch(
        configFile=configFile,
        channel=channel,
コード例 #25
0
#!/usr/bin/env python

import TauAnalysis.Configuration.tools.castor as castor
import TauAnalysis.TauIdEfficiency.tools.castor_mirror2 as castor_mirror

import subprocess
import shlex

# Get all the skim files from the castor directory
sourceFilePath = "/castor/cern.ch/user/v/veelken/CMSSW_4_2_x/PATtuples/TauPtRes/V2exp/"
source_files = [ file_info['path'] for file_info in castor.nslsl(sourceFilePath) ]

targetFilePath = "/data2/veelken/CMSSW_4_2_x/PATtuples/TauPtRes/V2exp/"

jobId = "2011Aug18"
version = "V2exp"

samplesToCopy = [
    # modify in case you want to submit jobs for some of the samples only...
]

files_to_copy = []

for source_file in source_files:

    if source_file.find("%s%s" % (jobId, version)) == -1:
	continue

    isSampleToCopy = False
    if len(samplesToCopy) == 0:
        isSampleToCopy = True
コード例 #26
0
if inputFilePath.find('/castor/') == 0:
    mode = 'castor'
if inputFilePath.find('/store/') == 0:
    mode = 'eos'
else:
    mode = 'local'

if jobId is None:
    reg.overrideJobId(
        channel, '2011Oct30'
    )  # CV: need to overwrite this in order to match Mauro's filenames
    jobId = reg.getJobId(channel)
print(" jobId = %s" % jobId)

if mode == 'castor':
    files = [file_info for file_info in castor.nslsl(inputFilePath)]
elif mode == 'eos':
    files = [file_info for file_info in eos.lsl(inputFilePath)]
else:
    commandLine = '%s %s' % (options['executable_ls'][mode], inputFilePath)
    args = shlex.split(commandLine)
    retval = subprocess.Popen(args, stdout=subprocess.PIPE)
    #retval.wait()

    files = retval.stdout.read().split('\n')
    #print(" files = %s" % files)

fileName_regex = r"(?P<fileName_base>[a-zA-Z0-9_]+)_(?P<gridJob>\d*)(_(?P<gridTry>\d*))*_(?P<hash>[a-zA-Z0-9]*).root"
fileName_matcher = re.compile(fileName_regex)

fileNamesAndProperties_dict = {}
            ]
        else:
            raise ValueError("Invalid mass-point = %i !!" % massPoint)
    else:
        raise ValueError("Invalid channel = %s !!" % channel)    
    inputFile_regex = \
      r"[a-zA-Z0-9_/:.]*genTauLeptonPairSkim_(ggHiggs|ggPhi|vbfHiggs)%i_%s_(?P<gridJob>\d*)(_(?P<gridTry>\d*))*_(?P<hash>[a-zA-Z0-9]*).root" % (massPoint, channel)
else:
    raise ValueError("Invalid sample type = %s !!" % sample_type)

# check if name of inputFile matches regular expression
inputFileNames = []
for inputFilePath in inputFilePaths:
    files = None
    if inputFilePath.startswith('/castor/'):
        files = [ "".join([ "rfio:", file_info['path'] ]) for file_info in castor.nslsl(inputFilePath) ]
    elif inputFilePath.startswith('/store/'):
        files = [ file_info['path'] for file_info in eos.lsl(inputFilePath) ]
    else:
        files = [ "".join([ "file:", inputFilePath, file ]) for file in os.listdir(inputFilePath) ]
    for file in files:
        #print "file = %s" % file
        inputFile_matcher = re.compile(inputFile_regex)
        if inputFile_matcher.match(file):
           inputFileNames.append(file)
print "inputFileNames = %s" % inputFileNames 

process.source.fileNames = cms.untracked.vstring(inputFileNames)
#--------------------------------------------------------------------------------

process.testSVfitTrackLikelihoodProductionSequence = cms.Sequence()
コード例 #28
0
def submitAnalysisToLXBatch(configFile = None, channel = None, samples = None,
                            samplesToAnalyze = None, samplesToSkip = None,
                            disableFactorization = False,
                            disableSysUncertainties = False,
                            disableZrecoilCorrections = False,
                            script_directory=None,
                            cfgdir = 'lxbatch',
                            inputFileMap = None, outputFileMap = None,
                            outputDirectory = None,
                            queue = '1nd',
                            enableEventDumps = False,
                            enableFakeRates = False,
                            processName = None,
                            changeTauId = None,
                            saveFinalEvents = False,
                            jobExtention = ''):

    """
    Submit analysis job (event selection, filling of histogram)
    to local machine
    """

    # check that configFile, channel, samples and jobId
    # parameters are defined and non-empty
    for param in ["configFile", "channel", "samples",
                  "outputDirectory"]:
        if locals()[param] is None:
            raise ValueError("Undefined '%s' parameter!!" % param)

    jobId = reg.getJobId(channel)

    # If not specified take script directory from user preferences.
    if script_directory is None:
        script_directory = reg.getHarvestScriptLocation()

    # Make sure our output file for the scripts is okay
    if not os.path.exists(script_directory):
        os.makedirs(script_directory)

    # Get all the files in our output directory that have non-zero size
    tmp_files = set(x['file'] for x in castor.nslsl(outputDirectory)
                    if x['size'])

    # Keep track of the files we care about
    relevant_files = set([])

    submit_file_name = 'submit_lxbatch_analysis_' + jobId + '.sh'
    with open(submit_file_name, 'w') as submit_file:
        # Loop over the samples to be analyzed
        for sample in samples['SAMPLES_TO_ANALYZE']:
            write_comment_header(submit_file, " Sample: " + sample)
            # Skip submitting crab job in case
            #  o list of samples for which crab jobs are to be submitted has been
            #    explicitely specified
            #  o sample has explicitely been requested to be skipped
            if samplesToAnalyze:
                if sample not in samplesToAnalyze:
                    print "Skipping", sample
                    continue
            if samplesToSkip:
                if sample in samplesToSkip:
                    print "Skipping", sample
                    continue

            sample_info = samples['RECO_SAMPLES'][sample]

            # Make job info
            jobInfo = {
                'channel' : channel,
                'sample' : sample,
                'id' : jobId
            }

            # Now build the scripts to feed to bsub
            # Find the input files
            input_files = list(inputFileMap(channel, sample, jobId))

            if len(input_files) > 0:
                print("Submitting %s in %i part(s)" % (sample, len(input_files)))
            else:
                print("No local input files for %s found !!" % sample)

            for job, file in enumerate(input_files):

                input_files = [file]
                # The None in the tuple indicates this file has no dependencies in
                # the batch job.
                input_files_and_jobs = [ (None, file) for file in input_files ]
                # Need to prepend file:, and strip off the directory since we
                # always have bsub rfcp the input files to the working
                # directory.
                input_files_for_cfgOptions = [
                    'file:' + os.path.basename(file) for file in input_files]


                output_file = outputFileMap(channel, sample, jobId)
                input_file_hash = jobtools.hash_files(
                    input_files, add_time=False)
                # Add the hash of the input file so we know the provenance of all
                # files
                output_file = os.path.join(outputDirectory, output_file.replace(
                    '.root', '_' + str(job) + '_' + input_file_hash + '.root'))

                relevant_files.add(os.path.basename(output_file))

                # Uncomment to skip rerunning of old jobs
                #if os.path.basename(output_file) in tmp_files:
                    #print " done; skipping", output_file
                    #continue

                # First, prepare the configuration file
                newConfigFile = getNewConfigFileName(
                    configFile, cfgdir, sample,
                    jobId, index = job, label = "@lxbatch")

                write_comment_header(submit_file, " cfg: " + newConfigFile)
                #--------------------------------------------------------------------
                # CV: temporary "hack" for producing (ED)Ntuples/skims for tau id. efficiency measurement
                jobCustomizations = []
                jobCustomizations.append("if hasattr(process, 'ntupleOutputModule'):")
                jobCustomizations.append("    process.ntupleOutputModule.fileName = '%s'" % os.path.basename(output_file))
                jobCustomizations.append("if hasattr(process, 'patTupleOutputModule'):")
                jobCustomizations.append("    process.patTupleOutputModule.fileName = '%s'" % os.path.basename(output_file))
                jobCustomizations.append("if hasattr(process, 'skimOutputModule'):")
                jobCustomizations.append("    process.skimOutputModule.fileName = '%s'" % os.path.basename(output_file))
                HLTprocessName = 'HLT'
                if 'hlt' in samples['RECO_SAMPLES'][sample].keys():
                    HLTprocessName = samples['RECO_SAMPLES'][sample]['hlt'].getProcessName()
                    jobCustomizations.append("if hasattr(process, 'hltMu'):")
                    jobCustomizations.append("    process.hltMu.selector.src = cms.InputTag('TriggerResults::%s')" % HLTprocessName)
                    jobCustomizations.append("if hasattr(process, 'patTrigger'):")
                    jobCustomizations.append("    process.patTrigger.processName = '%s'" % HLTprocessName)
                    jobCustomizations.append("if hasattr(process, 'patTriggerEvent'):")
                    jobCustomizations.append("    process.patTriggerEvent.processName = '%s'" % HLTprocessName)
                if samples['RECO_SAMPLES'][sample]['type'] == 'Data':
                    jobCustomizations.append("if hasattr(process, 'prePatProductionSequence')"
                                            + " and hasattr(process, 'prePatProductionSequenceGen'):")
                    jobCustomizations.append("    process.prePatProductionSequence.remove(process.prePatProductionSequenceGen)")
                    jobCustomizations.append("if hasattr(process, 'ntupleProducer'):")
                    jobCustomizations.append("    if hasattr(process.ntupleProducer.sources, 'tauGenJets'):")
                    jobCustomizations.append("        delattr(process.ntupleProducer.sources, 'tauGenJets')")
                    jobCustomizations.append("    if hasattr(process.ntupleProducer.sources, 'genJets'):")
                    jobCustomizations.append("        delattr(process.ntupleProducer.sources, 'genJets')")
                    jobCustomizations.append("    if hasattr(process.ntupleProducer.sources, 'genPhaseSpaceEventInfo'):")
                    jobCustomizations.append("        delattr(process.ntupleProducer.sources, 'genPhaseSpaceEventInfo')")
                    jobCustomizations.append("    if hasattr(process.ntupleProducer.sources, 'genPileUpEventInfo'):")
                    jobCustomizations.append("        delattr(process.ntupleProducer.sources, 'genPileUpEventInfo')")
                jobCustomizations.append("if hasattr(process, 'patTriggerEventSequence') and hasattr(process, 'patTriggerSequence'):")
                jobCustomizations.append("    process.patDefaultSequence.replace(process.patTriggerEventSequence,")
                jobCustomizations.append("                                       process.patTriggerSequence + process.patTriggerEventSequence)")    
                #jobCustomizations.append("print process.dumpPython()")
                #--------------------------------------------------------------------

                prepareConfigFile(
                    configFile = configFile, jobInfo = jobInfo,
                    newConfigFile = newConfigFile,
                    sample_infos = samples,
                    disableFactorization = disableFactorization,
                    disableSysUncertainties = disableSysUncertainties,
                    disableZrecoilCorrections = disableZrecoilCorrections, 
                    # We always copy the input files to the local directory
                    # before running cmsRun, so just take the basname
                    input_files = input_files_for_cfgOptions,
                    output_file = os.path.basename(output_file),
                    enableEventDumps = enableEventDumps, enableFakeRates = enableFakeRates,
                    processName = processName,
                    saveFinalEvents = saveFinalEvents,
                    changeTauId = changeTauId,
                    customizations = jobCustomizations)

                # Build a function that constructs our log file name given the
                # job file hash.
                if not os.path.exists('lxbatch_log'):
                    os.makedirs('lxbatch_log')
                def log_file_maker(job_hash):
                    return os.path.join(
                        'lxbatch_log', "_".join(
                        ['run', channel, sample, jobId, job_hash]) + '.log')

                # Build our batch job
                jobname, script = jobtools.make_bsub_script(
                    output_file, input_files_and_jobs, log_file_maker,
                    "cmsRun %s" % newConfigFile, pass_io_files = False)

                bsub_script_file = os.path.join(
                    script_directory, "_".join([
                        'analyze'+jobExtention, sample, 'job',
                        str(job), input_file_hash]) + '.sh')
                with open(bsub_script_file, 'w') as bsub_script:
                    bsub_script.write(script)
                # Add this bsub to our submission script
                submit_file.write("bsub -q %s < %s\n" % (queue, bsub_script_file))

        print len(tmp_files)
        garbage = tmp_files - relevant_files
        print len(garbage)
        if garbage:
            print "Found %i files not generated by this job!!" % len(garbage)
            print " You should really run:"
            print " cat ana_garbage.txt | xargs -n 1 -P 10 rfrm"
            with open('ana_garbage.txt', 'w') as garbage_script:
                for file in garbage:
                    garbage_script.write(
                        '%s\n' % os.path.join(outputDirectory, file))
        print "Run ./%s to submit jobs" % submit_file_name
        os.chmod(submit_file_name, 0755)

        return submit_file_name
bsubScriptFileNames = {}
bsubJobNames        = {}
for sampleToAnalyze in samplesToAnalyze:

    print "checking sample %s" % sampleToAnalyze

    bsubFileNames[sampleToAnalyze]       = {}
    bsubScriptFileNames[sampleToAnalyze] = {}
    bsubJobNames[sampleToAnalyze]        = {}

    inputFilePath = samples[sampleToAnalyze]['skimFilePath']
    print " inputFilePath = %s" % inputFilePath
    
    inputFileNames = None
    if inputFilePath.find("/castor") != -1:
        inputFileNames = [ file_info['path'] for file_info in castor.nslsl(inputFilePath) ]
    elif inputFilePath.find("/store") != -1:
        inputFileNames = [ file_info['path'] for file_info in eos.lsl(inputFilePath) ]
    else:
        inputFileNames = [ file for file in os.listdir(inputFilePath) ]
    #print " inputFileNames = %s" % inputFileNames
    
    inputFileNames_matched = [ os.path.basename(input_file) for input_file in input_mapper(inputFileNames, sampleToAnalyze) ]
    #print "inputFileNames_matched = %s" % inputFileNames_matched
    print "--> found %i inputFiles" % len(inputFileNames_matched)
    
    for jobId, inputFileNames_chunk in enumerate(chunks(inputFileNames_matched, samples[sampleToAnalyze]['numInputFilesPerJob'])):
        # Build script for batch job submission;
        # the None in the tuple indicates that batch job has no dependencies on other batch jobs
        input_files_and_jobs = \
          [ (None, os.path.join(inputFilePath, inputFileName)) for inputFileName in inputFileNames_chunk ]
コード例 #30
0

# CV: fill mapping of fileName to number of events contained in file into temporary cache
#     in order to reduce castor file I/O
print "initializing mapping of fileNames to number of events contained in each file..."
numEventsMap = {}
fileNamesToMap = []
for sampleToAnalyze in samplesToAnalyze:
    for channelToAnalyze in channelsToAnalyze:
        inputFilePath_channel = os.path.join(inputFilePath, version,
                                             channelToAnalyze)
        inputFileNames = None
        if inputFilePath_channel.find('/castor/') != -1:
            inputFileNames = [
                file_info['path']
                for file_info in castor.nslsl(inputFilePath_channel)
            ]
        else:
            inputFileNames = os.listdir(inputFilePath_channel)
        for inputFileName in inputFileNames:
            if inputFileName.find("".join(['_', sampleToAnalyze, '_'])) != -1 or \
               inputFileName.find("".join(['/', sampleToAnalyze, '_'])) != -1:
                fileNamesToMap.append(inputFileName)
                # CV: request inputFiles located on castor to be prestaged
                #     in order to speed-up computation of numbers of events contained in each file
                #     by 'buildConfigFile_SVfitEventHypothesisAnalyzer' function later
                if inputFilePath_channel.find('/castor/') != -1:
                    commandLine = '%s -M %s -U myfiles' % (executable_stager,
                                                           inputFileName)
                    runCommand(commandLine)
print " done."
コード例 #31
0
def harvestTauIdEffSamples(channel = None, samples = None, inputFilePath = None,
                           outputFilePath = None, jobId = None,
                           tmpFilePath = None):

    # check that channel, samples, inputFilePath, outputFilePath, tmpFilePath and jobId
    # parameters are defined and non-empty
    if channel is None:
        raise ValueError("Undefined channel Parameter !!")
    if samples is None:
        raise ValueError("Undefined samples Parameter !!")
    if inputFilePath is None:
        raise ValueError("Undefined inputFilePath Parameter !!")
    if outputFilePath is None:
        raise ValueError("Undefined outputFilePath Parameter !!")
    if tmpFilePath is None:
        raise ValueError("Undefined tmpFilePath Parameter !!")
    if jobId is None:
        raise ValueError("Undefined jobId Parameter !!")

    if not os.path.exists(tmpFilePath):
        os.mkdir(tmpFilePath)
    if not os.path.exists(outputFilePath):
        os.mkdir(outputFilePath)

    # Use CASTOR to find the files to merge
    print "Finding CASTOR files"
    print(" inputFilePath = " + inputFilePath)
    print(" jobId = " + jobId)

    files_in_castor_info = castor.nslsl(inputFilePath)

    files_and_times = [
        (file_info['time'], file_info['path'])
        for file_info in files_in_castor_info
        if file_info['file'].find('_%s_' % jobId) != -1 ]
    # Sort files by modified time
    print "Sorting by modified time"
    files_and_times.sort()
    #print "files_and_times", files_and_times

    skim_harvest_jobs = []

    for sample in SAMPLES_TO_ANALYZE:
        print "Finding input files for", sample
        # Get final event skims that need to be merged
        event_files_to_merge = list(
            'rfio:%s' % file for time, file in files_and_times
            if file.find('tauIdEffSample_%s_%s_' %
                         (sample, jobId)) != -1)
        #print "event_files_to_merge", event_files_to_merge
        skim_output_path = os.path.join(
            outputFilePath, "tauIdEffSkim_%s_%s.root" % (sample, jobId))
        skim_harvest_jobs.append(
            (sample, skim_output_path, event_files_to_merge))

    print "Creating Makefile for skimmed event files"
    skim_MakefileName = "Makefile.mergeTauIdEffSkims_%s" % (jobId)
    buildMakefile(skim_harvest_jobs, tmpFilePath, skim_MakefileName,
                  merge_per_job = 7, harvest_tool = 'genericSkimMerger.py')

    print "Makefile built. In order to start harvesting, execute 'make -f %s -j 8 -k'" % skim_MakefileName
コード例 #32
0
def buildConfigFile_SVfitEventHypothesisAnalyzer(sampleToAnalyze,
                                                 channelToAnalyze,
                                                 metResolution,
                                                 configFileName_template,
                                                 inputFilePath,
                                                 numInputFilesPerJob,
                                                 maxEventsPerJob,
                                                 configFilePath,
                                                 logFilePath,
                                                 outputFilePath,
                                                 numEventsMap=None):
    """Build cfg.py file to run SVfit algorithm and fill histograms of SVfit reconstructed mass"""

    #print "inputFilePath = %s" % inputFilePath

    inputFileNames = None
    if inputFilePath.find('/castor/') != -1:
        inputFileNames = [
            file_info['path'] for file_info in castor.nslsl(inputFilePath)
        ]
    else:
        inputFileNames = os.listdir(inputFilePath)
    #print "inputFileNames = %s" % inputFileNames

    # check if inputFile matches sampleToAnalyze
    inputFileNames_sample = []
    for inputFileName in inputFileNames:
        if inputFileName.find("".join(['_', sampleToAnalyze, '_'])) != -1 or \
           inputFileName.find("".join(['/', sampleToAnalyze, '_'])) != -1:
            # CV: assume that input file gets copied to local directory before cmsRun gets started
            inputFileNames_sample.append(os.path.basename(inputFileName))

    #print(sampleToAnalyze)
    #print(inputFileNames_sample)

    if len(inputFileNames_sample) == 0:
        print("Sample %s, channel = %s has no input files --> skipping !!" %
              (sampleToAnalyze, channelToAnalyze))
        return

    # CV: restrict the number of input files to 50 in order to balance event statistics
    #    (and computing time) for different mass-points
    if len(inputFileNames_sample) > 50:
        inputFileNames_sample = inputFileNames_sample[0:50]

    numInputFiles = len(inputFileNames_sample)
    numInputFileGroups = (numInputFiles / numInputFilesPerJob)
    if (numInputFiles % numInputFilesPerJob) != 0:
        numInputFileGroups = numInputFileGroups + 1

    inputFileNameGroups_sample = []
    skipEvents_sample = []
    for fileId in range(numInputFileGroups):
        inputFileIdx_first = fileId * numInputFilesPerJob
        inputFileIdx_last = inputFileIdx_first + numInputFilesPerJob
        if inputFileIdx_last > len(inputFileNames_sample):
            inputFileIdx_last = len(inputFileNames_sample)
        #print "inputFileIdx: first = %i, last = %i" % (inputFileIdx_first, inputFileIdx_last)
        numEvents = 0
        for inputFileIdx in range(inputFileIdx_first, inputFileIdx_last):
            inputFileName = inputFileNames_sample[fileId]
            numEvents_i = None
            if numEventsMap is not None and numEventsMap.has_key(
                    os.path.basename(inputFileName)):
                numEvents_i = numEventsMap[inputFileName]
            else:
                numEvents_i = getNumEvents(
                    os.path.join(inputFilePath, inputFileNames_sample[fileId]))
            numEvents = numEvents + numEvents_i
        numJobsPerGroup = (numEvents / maxEventsPerJob)
        if (numEvents % maxEventsPerJob) != 0:
            numJobsPerGroup = numJobsPerGroup + 1
        print "group of inputFiles = %s contains %i events --> splitting into %i jobs." % \
          (inputFileNames_sample[inputFileIdx_first:inputFileIdx_last], numEvents, numJobsPerGroup)
        for jobId in range(numJobsPerGroup):
            inputFileNameGroups_sample.append(
                inputFileNames_sample[inputFileIdx_first:inputFileIdx_last])
            skipEvents_sample.append(jobId * maxEventsPerJob)

    #print "inputFileNameGroups_sample = %s" % inputFileNameGroups_sample

    configFileNames = []
    outputFileNames = []
    logFileNames = []

    numJobs = len(inputFileNameGroups_sample)

    for jobId in range(numJobs):

        inputFileNames_string = "[ "
        for inputFileName_sample in inputFileNameGroups_sample[jobId]:
            inputFileNames_string += "'file:%s', " % inputFileName_sample
        inputFileNames_string += " ]"

        sample_type = None
        sample_type_Z_regex = "[Ztautau|ZplusJets|ZToTauTau]"
        sample_type_Z_matcher = re.compile(sample_type_Z_regex)
        sample_type_Higgs_regex = "(((gg|bb|vbf)(Higgs|Phi))|HToTauTau_M-)[0-9]+"
        sample_type_Higgs_matcher = re.compile(sample_type_Higgs_regex)
        if sample_type_Z_matcher.match(sampleToAnalyze):
            sample_type = 'Z'
        elif sample_type_Higgs_matcher.match(sampleToAnalyze):
            sample_type = 'Higgs'
        else:
            raise ValueError(
                "Failed to determine wether sample = %s is Z or Higgs sample !!"
                % sampleToAnalyze)

        metResolution_string = "None"
        metResolution_label = "pfMEtResMC"
        if metResolution is not None:
            metResolution_string = "%f" % metResolution
            metResolution_label = "pfMEtRes%1.0f" % metResolution
            metResolution_label = metResolution_label.replace(".", "_")

        outputFileName = 'svFitPerformanceAnalysisPlots_%s_%s_%s_%i.root' % \
          (sampleToAnalyze, channelToAnalyze, metResolution_label, jobId + 1)
        outputFileNames.append(outputFileName)

        replacements = []
        replacements.append(['sample', "'%s'" % sampleToAnalyze])
        replacements.append(['sample_type', "'%s'" % sample_type])
        replacements.append(['channel', "'%s'" % channelToAnalyze])
        replacements.append(['metResolution', "%s" % metResolution_string])
        replacements.append(['skipEvents', "%i" % skipEvents_sample[jobId]])
        replacements.append(['maxEvents', "%i" % maxEventsPerJob])
        replacements.append(['inputFileNames', "%s" % inputFileNames_string])
        replacements.append(['outputFileName', "'%s'" % outputFileName])

        configFileName = "svFitPerformanceAnalysisPlots_%s_%s_%s_%i_cfg.py" % \
          (sampleToAnalyze, channelToAnalyze, metResolution_label, jobId)
        configFileName_full = os.path.join(configFilePath, configFileName)
        replaceConfigFileParam(configFileName_template, configFileName_full,
                               replacements)
        configFileNames.append(configFileName)

        logFileName = configFileName.replace('_cfg.py', '.log')
        logFileName_full = os.path.join(logFilePath, logFileName)
        logFileNames.append(logFileName)

    retVal = {}
    retVal['inputFileNames'] = inputFileNameGroups_sample
    retVal['configFileNames'] = configFileNames
    retVal['outputFileNames'] = outputFileNames
    retVal['logFileNames'] = logFileNames

    #print " inputFileNames = %s" % inputFileNames_sample
    #print " configFileNames = %s" % configFileNames
    #print " outputFileNames = %s" % outputFileNames
    #print " logFileNames = %s" % logFileNames

    return retVal