Esempio n. 1
0
    def connect_by_pck_(self):
        ''' '''
        from CMGTools.RootTools.utils.getFiles import getFiles

        redict_aliases = dict(zip(self.aliases.values(), self.aliases.keys()))

        regex = re.compile(r'(?P<sample>[a-zA-Z0-9_]+[a-zA-Z])(?:[0-9]+)$')

        for alias_k, alias_v in self.mc_dict.items():
            m = regex.match(alias_k)
            if m and 'QCD' not in alias_k:
                alias_k = m.group('sample')
            if alias_k not in self.aliases.values():
                continue
            sample_pck = '*'.join([
                '', redict_aliases[alias_k].replace('/', '').replace('.', '*'),
                self.tier.replace('%', ''), self.pattern + '.pck'
            ])
            cached_sample = glob.glob('/'.join(
                [self.homedir, '.cmgdataset', sample_pck]))
            single_mc_list = [alias_v]

            if len(cached_sample) == 0:
                print 'sample not cached yet, connecting to the DB'
                from CMGTools.RootTools.utils.connect import connect
                connect(single_mc_list,
                        self.tier,
                        self.pattern,
                        self.aliases,
                        cache=self.cache,
                        verbose=self.verbose)

            elif len(cached_sample) > 1:
                print 'better specify which sample, many found'
                print cached_sample
                raise

            else:
                file = open(cached_sample[0])
                mycomp = pickle.load(file)
                single_mc_list[0].files = getFiles(
                    '/'.join([''] + mycomp.lfnDir.split('/')
                             [mycomp.lfnDir.split('/').index('CMG') + 1:]),
                    mycomp.user,
                    self.pattern,
                    useCache=self.cache)
                print 'attached files to %s' % (single_mc_list[0].name)
                print 'files %s' % (
                    '/'.join(single_mc_list[0].files[0].split('/')[:-1] +
                             [self.pattern]))
Esempio n. 2
0
    def connect_by_pck_(self):
        ''' '''
        from CMGTools.RootTools.utils.getFiles import getFiles

        redict_aliases = dict( zip(self.aliases.values(), self.aliases.keys()) )

        regex = re.compile(r'(?P<sample>[a-zA-Z0-9_]+[a-zA-Z])(?:[0-9]+)$')

        for alias_k, alias_v in self.mc_dict.items():
            m = regex.match(alias_k)
            if m and 'QCD' not in alias_k:
                alias_k = m.group('sample')
            if alias_k not in self.aliases.values():
                continue
            sample_pck = '*'.join(['',redict_aliases[alias_k].replace('/','').replace('.','*'),
                                   self.tier.replace('%',''),self.pattern+'.pck'])
            cached_sample = glob.glob('/'.join([self.homedir,'.cmgdataset',sample_pck]))
            single_mc_list = [alias_v]

            if len(cached_sample) == 0:
                print 'sample not cached yet, connecting to the DB'
                from CMGTools.RootTools.utils.connect import connect
                connect(single_mc_list, self.tier, self.pattern, self.aliases,
                         cache=self.cache, verbose=self.verbose)

            elif len(cached_sample) >1:
                print 'better specify which sample, many found'
                print cached_sample
                raise

            else:
                file = open(cached_sample[0])
                mycomp = pickle.load(file)
                single_mc_list[0].files = getFiles('/'.join( ['']+mycomp.lfnDir.split('/')[mycomp.lfnDir.split('/').index('CMG')+1:] ),
                                                              mycomp.user, self.pattern, useCache=self.cache)
                print 'attached files to %s' %(single_mc_list[0].name)
                print 'files %s' %('/'.join(single_mc_list[0].files[0].split('/')[:-1]+[self.pattern]))
Esempio n. 3
0
def connectSample(components, row, filePattern, aliases, cache, verbose):
    id = row[0]
    path_name = row[1]
    file_owner = row[2]
    info = []
    compName = findAlias(path_name, aliases)
    #import pdb ; pdb.set_trace()
    if compName is None:
        print 'WARNING: cannot find alias for', path_name
        return False
    findFirstAncestor(id, info)
    dsInfo = processInfo(info)
    if verbose:
        pprint.pprint(dsInfo)
    path_name = dsInfo[0]['path_name']
    globalEff = 1.
    nEvents = dsInfo.primary_dataset_entries
    taskurl = 'https://savannah.cern.ch/task/?{task_id}'.format(
        task_id=dsInfo[0]['task_id'])
    for step in dsInfo:
        eff = 0.
        if step['step'] == 'TAUTAU':
            eff = step['jobeff']
        elif step['step'] == 'MERGE':
            eff = step['jobeff']
        elif step['step'] == 'PATCMG':
            eff = step['fraction']
            if eff is None:
                eff = step['jobeff']
        elif step['step'] == 'PFAOD':
            eff = 1.0  # not to double count with PATCMG
        else:
            eff = step['jobeff']
        if eff is None:
            print 'WARNING: efficiency not determined for', compName
            eff = 0.0
        try:
            globalEff *= eff
        except TypeError:
            pprint.pprint(dsInfo)
            raise
    comps = [comp for comp in components if comp.name == compName]
    if len(comps) > 1:
        #import pdb ; pdb.set_trace()
        print 'WARNING find several components for compName', compName
        print map(str, comps)
        return False
    elif len(comps) == 0:
        print 'WARNING no component found for compName', compName
        #import pdb; pdb.set_trace()
        return False
    comp = comps[0]
    comp.dataset_entries = dsInfo.dataset_entries
    if not ( comp.name.startswith('data_') or \
             comp.name.startswith('embed_') ):
        comp.nGenEvents = nEvents
        if comp.nGenEvents is None:
            print 'WARNING: nGenEvents is None, setting it to 1.'
            comp.nGenEvents = 1.
        if comp.nGenEvents != 1.:
            comp.nGenEvents *= globalEff
        else:
            globalEff = -1.
            comp.nGenEvents = 0
    print 'LOADING:', comp.name, path_name, nEvents, globalEff, taskurl
    # print dsInfo
    comp.files = getFiles(path_name, file_owner, filePattern, cache)
    if comp.name.startswith('data_'):
        if globalEff < 0.99:
            print 'ARGH! data sample is not complete.', taskurl
            print dsInfo
    else:
        if globalEff < 0.9:
            print 'WEIRD! Efficiency is way too low ({globalEff})! you might have to edit your cfg manually.'.format(
                globalEff=globalEff)
            print dsInfo
Esempio n. 4
0
##     deltaEta = 3.5,
##     cjvPtCut = 30.,
##     )

treeProducer = cfg.Analyzer(
     'PFTreeProducer'
     )


###############################################################################


from CMGTools.ZJetsTutorial.samples.run2012.ewk import DYJets
from CMGTools.RootTools.utils.getFiles import getFiles

DYJets.files = getFiles('/DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball/Summer12_DR53X-PU_S10_START53_V7A-v1/AODSIM/V5_B/PAT_CMG_V5_16_0', 'cmgtools', '.*root')

###############################################################################


MC_list = [DYJets]

allsamples = MC_list

## allsamples = [DYJets]
## for c in allsamples:
##     c.triggers = [
##         'HLT_Mu17_Mu8_v16',
##         'HLT_Mu17_Mu8_v17',
##         'HLT_Mu17_Mu8_v18',
##         'HLT_Mu17_Mu8_v19',
from CMGTools.RootTools.RootTools import * 



WNJetsAna = cfg.Analyzer(
    'WNJetsAnalyzer',
    verbose = False
    )


#########################################################################################


WJets = cfg.MCComponent(
    name = 'WJets',
    files = getFiles('/WJetsToLNu_TuneZ2_7TeV-madgraph-tauola/Fall11-PU_S6_START42_V14B-v1/AODSIM/V5_B/PAT_CMG_V5_6_0_B', 'cmgtools', 'cmgTuple.*root'),
    xSection = 31314.,
    nGenEvents = 1,
    triggers = [],
    effCorrFactor = 1 )


#########################################################################################

selectedComponents = [WJets]

sequence = cfg.Sequence( [
    WNJetsAna, 
   ] )

Esempio n. 6
0
def connectSample(components, row, filePattern, aliases, cache, verbose):
    id = row[0]
    path_name = row[1]
    file_owner = row[2]
    info = []
    compName = findAlias(path_name, aliases)
    #import pdb ; pdb.set_trace()
    if compName is None:
        print 'WARNING: cannot find alias for', path_name
        return False
    findFirstAncestor(id, info)
    dsInfo = processInfo(info)
    if verbose:
        pprint.pprint( dsInfo )
    path_name = dsInfo[0]['path_name']
    globalEff = 1.
    nEvents = dsInfo.primary_dataset_entries
    taskurl = 'https://savannah.cern.ch/task/?{task_id}'.format(task_id=dsInfo[0]['task_id'])
    for step in dsInfo:
        eff = 0.
        if step['step']=='TAUTAU':
            eff = step['jobeff']
        elif step['step']=='MERGE':
            eff = step['jobeff']
        elif step['step']=='PATCMG':
            eff = step['fraction']
            if eff is None:
                eff = step['jobeff']
        elif step['step']=='PFAOD':
            eff = 1.0 # not to double count with PATCMG
        else:
            eff = step['jobeff']
        if eff is None:
            print 'WARNING: efficiency not determined for',compName
            eff = 0.0
        try:
            globalEff *= eff
        except TypeError:
            pprint.pprint(dsInfo)
            raise
    comps = [comp for comp in components if comp.name == compName]
    if len(comps)>1:
        #import pdb ; pdb.set_trace()
        print 'WARNING find several components for compName', compName
        print map(str, comps)
        return False
    elif len(comps)==0:
        print 'WARNING no component found for compName', compName
        #import pdb; pdb.set_trace()
        return False
    comp = comps[0]
    comp.dataset_entries = dsInfo.dataset_entries
    if not ( comp.name.startswith('data_') or \
             comp.name.startswith('embed_') ):
        comp.nGenEvents = nEvents
        if comp.nGenEvents is None:
            print 'WARNING: nGenEvents is None, setting it to 1.'
            comp.nGenEvents = 1.
        if comp.nGenEvents != 1.:
            comp.nGenEvents *= globalEff
        else:
            globalEff = -1.
            comp.nGenEvents = 0
    print 'LOADING:', comp.name, path_name, nEvents, globalEff, taskurl
    # print dsInfo
    comp.files = getFiles(path_name, file_owner, filePattern, cache)
    if comp.name.startswith('data_'):
        if globalEff<0.99:
            print 'ARGH! data sample is not complete.', taskurl
            print dsInfo
    else:
        if globalEff<0.9:
            print 'WEIRD! Efficiency is way too low ({globalEff})! you might have to edit your cfg manually.'.format(globalEff=globalEff)
            print dsInfo