Ejemplo n.º 1
0
def getFileList(procData,DefaultNFilesPerJob):
   global nonLocalSamples
   global isLocalSample
   isLocalSample = False

   FileList = [];
   miniAODSamples = getByLabel(procData,'miniAOD','')
   isMINIAODDataset = ("/MINIAOD" in getByLabel(procData,'dset','')) or  ("amagitte" in getByLabel(procData,'dset',''))
   if(isMINIAODDataset or len(getByLabel(procData,'miniAOD',''))>0):
      instance = ""
      if(len(getByLabel(procData,'dbsURL',''))>0): instance =  "instance=prod/"+ getByLabel(procData,'dbsURL','')
      listSites = commands.getstatusoutput('das_client.py --query="site dataset='+getByLabel(procData,'dset','') + ' ' + instance + ' | grep site.name,site.dataset_fraction " --limit=0')[1]
      IsOnLocalTier=False
      for site in listSites.split('\n'):
         if(localTier != "" and localTier in site and '100.00%' in site):
            IsOnLocalTier=True            
            print ("Sample is found to be on the local grid tier (%s): %s") %(localTier, site)
            break
      isLocalSample = IsOnLocalTier

      if(localTier != "" and not IsOnLocalTier):
         nonLocalSamples += [getByLabel(procData,'dset','')]

      list = []
      if(IsOnLocalTier or isMINIAODDataset):
         list = []
         if(DatasetFileDB=="DAS"):
            list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(procData,'dset','') + ' ' + instance + '" --limit=0')[1].split()
         elif(DatasetFileDB=="DBS"):
            curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET "
            dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
            sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store '
            list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+getByLabel(procData,'dset','')+'"'+sedTheList)[1].split()

         list = [x for x in list if ".root" in x] #make sure that we only consider root files
         for i in range(0,len(list)):              
            if IsOnLocalTier:
               if  (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be:/pnfs/iihe/cms/ph/sc4"+list[i]
               elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i]
               else:                                  list[i] = "root://eoscms//eos/cms"+list[i]            
            else:
               list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide
              #list[i] = "root://xrootd-cms.infn.it/"+list[i]    #optimal for EU side
              #list[i] = "root://cmsxrootd.fnal.gov/"+list[i]    #optimal for US side

      elif(len(getByLabel(procData,'miniAOD',''))>0):
         print "Processing private local sample: " + getByLabel(procData,'miniAOD','')
         list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True);                  
      else:
         print "Processing an unknown type of sample (assuming it's a private local sample): " + getByLabel(procData,'miniAOD','')
         list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True);

      list = storeTools.keepOnlyFilesFromGoodRun(list, getByLabel(procData,'lumiMask',''))       
      split=getByLabel(procData,'split',-1)
      if(split>0):
         NFilesPerJob = max(1,len(list)/split)
      else:
         NFilesPerJob = DefaultNFilesPerJob

      for g in range(0, len(list), NFilesPerJob):
         groupList = ''
         for f in list[g:g+NFilesPerJob]:
            groupList += '"'+f+'",\\n';
         FileList.append(groupList)

   else:
      print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
      for segment in range(0,split) :
         eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
         if(eventsFile.find('/store/')==0)  : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1]
         FileList.append('"'+eventsFile+'"')
   return FileList
Ejemplo n.º 2
0
            if (opt.onlytag != 'all'):
                itag = d['dtag']
                if (itag.find(opt.onlytag) < 0): continue

            inputdir = opt.inDir + "/" + d['dtag']
            print str(d['dtag']) + " --> " + inputdir

            if (opt.duplicates):
                dirToClean = inputdir
                if (dirToClean.find('/storage/data/cms/store/')):
                    dirToClean = dirToClean.replace(
                        '/storage/data/cms/store/',
                        '/storage_rw/data/cms/store/')  #Hack for Louvain T2
                removeDuplicates(dirToClean)

            filenames = fillFromStore(inputdir, 0, -1, False)
            nfiles = len(filenames)
            filenames = addPrefixSuffixToFileList("   '", filenames, "',")

            split = getByLabel(d, 'split', 1)
            NFilesToMerge = nfiles // split
            NFilesToMergeRemains = nfiles % split
            startFile = 0
            endFile = 0
            for segment in range(0, split):
                startFile = endFile
                endFile = endFile + NFilesToMerge
                if (NFilesToMergeRemains > 0):
                    endFile += 1
                    NFilesToMergeRemains -= 1
Ejemplo n.º 3
0
def getFileList(procData, DefaultNFilesPerJob):
    global nonLocalSamples
    global isLocalSample
    isLocalSample = False

    FileList = []
    nonMiniAODSamples = []
    miniAODSamples = getByLabel(procData, 'miniAOD', [])
    dsetSamples = getByLabel(procData, 'dset', [])
    for s in dsetSamples:
        if ("/MINIAOD" in s): miniAODSamples += [s]
        else: nonMiniAODSamples += [s]

    for sample in miniAODSamples:

        instance = ""
        if (len(getByLabel(procData, 'dbsURL', '')) > 0):
            instance = "instance=prod/" + getByLabel(procData, 'dbsURL', '')
        listSites = DASQuery('site dataset=' + sample + ' ' + instance +
                             ' | grep site.name,site.replica_fraction')
        IsOnLocalTier = False
        MaxFraction = 0
        FractionOnLocal = -1
        for site in listSites.split('\n'):
            if (localTier == ""): continue
            try:
                #	    MaxFraction = max(MaxFraction, float(site.split()[1].replace('%','')) )
                MaxFraction = max(
                    MaxFraction,
                    float(site.split()[2].replace('%', '').replace('"', '')))
            except:
                MaxFraction = max(MaxFraction, 0.0)
            if (localTier in site):
                #FractionOnLocal = float(site.split()[1].replace('%',''));
                FractionOnLocal = float(site.split()[2].replace('%',
                                                                '').replace(
                                                                    '"', ''))

        if (FractionOnLocal == MaxFraction):
            IsOnLocalTier = True
            print(
                "Sample is found to be on the local grid tier %s (%f%%) for %s"
            ) % (localTier, FractionOnLocal, sample)

        isLocalSample = IsOnLocalTier

        if (localTier != "" and not IsOnLocalTier):
            nonLocalSamples += [sample]

        list = []
        if (IsOnLocalTier or "/MINIAOD" in sample):
            list = []
            if (DatasetFileDB == "DAS"):
                list = DASQuery('file dataset=' + sample + ' ' +
                                instance).split()
#print list
# print "\n"
            elif (DatasetFileDB == "DBS"):
                curlCommand = "curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET "
                dbsPath = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
                sedTheList = ' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store '
                list = commands.getstatusoutput(initialCommand + curlCommand +
                                                '"' + dbsPath +
                                                '/files?dataset=' + sample +
                                                '"' + sedTheList)[1].split()

            list = [x for x in list if ".root" in x
                    ]  #make sure that we only consider root files
            for i in range(0, len(list)):
                if IsOnLocalTier:
                    if (hostname.find("iihe.ac.be") != -1):
                        list[
                            i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4" + list[
                                i]
                    elif (hostname.find("ucl.ac.be") != -1):
                        list[i] = "/storage/data/cms" + list[i]
                    else:
                        list[i] = "root://eoscms//eos/cms" + list[i]
                    #print list[i]
                else:
                    list[i] = "root://cms-xrd-global.cern.ch/" + list[
                        i]  #works worldwide
                #list[i] = "root://xrootd-cms.infn.it/"+list[i]    #optimal for EU side
                #list[i] = "root://cmsxrootd.fnal.gov/"+list[i]    #optimal for US side

        else:
            print "Processing private local sample: " + sample
            list = storeTools.fillFromStore(sample, 0, -1, True)

        list = storeTools.keepOnlyFilesFromGoodRun(
            list, os.path.expandvars(getByLabel(procData, 'lumiMask', '')))
        #   print len(list)
        split = getByLabel(procData, 'split', -1)
        if (split > 0):
            NFilesPerJob = max(1, len(list) / split)
        else:
            NFilesPerJob = DefaultNFilesPerJob
            if ((len(list) / NFilesPerJob) > 100):
                NFilesPerJob = len(list) / 100
                #make sure the number of jobs isn't too big

        for g in range(0, len(list), NFilesPerJob):
            #	 print g
            groupList = ''
            for f in list[g:g + NFilesPerJob]:
                groupList += '"' + f + '",\\n'

#print f
            FileList.append(groupList)

    for sample in nonMiniAODSamples:
        print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(
            segment) + '.root'
        for segment in range(0, split):
            eventsFile = opt.indir + '/' + origdtag + '_' + str(
                segment) + '.root'
            if (eventsFile.find('/store/') == 0):
                eventsFile = commands.getstatusoutput('cmsPfn ' +
                                                      eventsFile)[1]
            FileList.append('"' + eventsFile + '"')
    return FileList
Ejemplo n.º 4
0
        for d in data :

            #tag veto
            if(opt.onlytag!='all') :
                itag=d['dtag']
                if(itag.find(opt.onlytag)<0) : continue

            inputdir = opt.inDir+"/"+d['dtag'];
            print str(d['dtag'])+" --> "+inputdir

            if(opt.duplicates):
               dirToClean = inputdir
               if(dirToClean.find('/storage/data/cms/store/')): dirToClean = dirToClean.replace('/storage/data/cms/store/', '/storage_rw/data/cms/store/') #Hack for Louvain T2
               removeDuplicates(dirToClean);

            filenames=fillFromStore(inputdir,0,-1,False)
            nfiles=len(filenames)
            filenames=addPrefixSuffixToFileList("   '", filenames, "',")

            split=getByLabel(d,'split',1)
            NFilesToMerge = nfiles//split
            NFilesToMergeRemains = nfiles%split 
            startFile = 0
            endFile = 0 
            for segment in range(0,split) :
                startFile = endFile 
                endFile   = endFile + NFilesToMerge
                if(NFilesToMergeRemains>0):
                    endFile+=1
                    NFilesToMergeRemains-=1
Ejemplo n.º 5
0
        for d in data :

            #tag veto
            if(opt.onlytag!='all') :
                itag=d['dtag']
                if(itag.find(opt.onlytag)<0) : continue

            inputdir = opt.inDir+"/"+d['dtag'];
            print str(d['dtag'])+" --> "+inputdir

            if(opt.duplicates):
               dirToClean = inputdir
               if(dirToClean.find('/storage/data/cms/store/')): dirToClean = dirToClean.replace('/storage/data/cms/store/', '/storage_rw/data/cms/store/') #Hack for Louvain T2
               removeDuplicates(dirToClean);

            filenames=LaunchOnCondor.natural_sort(fillFromStore(inputdir,0,-1,False))
            nfiles=len(filenames)
            filenames=addPrefixSuffixToFileList("   '", filenames, "',")

            split=getByLabel(d,'split',1)
            NFilesToMerge = nfiles//split
            NFilesToMergeRemains = nfiles%split 
            startFile = 0
            endFile = 0 
            for segment in range(0,split) :
                startFile = endFile 
                endFile   = endFile + NFilesToMerge
                if(NFilesToMergeRemains>0):
                    endFile+=1
                    NFilesToMergeRemains-=1
Ejemplo n.º 6
0
            alldirs = []
            try:
                alldirs = d[opt.dirtag]
            except:
                continue

            #tag veto
            if (opt.onlytag != 'all'):
                itag = d['dtag']
                if (itag.find(opt.onlytag) < 0): continue

            idir = 0
            for dir in alldirs:
                idir = idir + 1
                filenames = fillFromStore(dir, 0, -1, False)
                nfiles = len(filenames)

                njobs = 1
                sleep = 0
                if (opt.fperjob > 0):
                    njobs = nfiles // opt.fperjob
                    if (nfiles % opt.fperjob > 0): njobs = njobs + 1
                    sleep = 2 * opt.fperjob / 6

                #special case for event generation
                if dir == "none":
                    opt.fperjob = d['npersplit']
                    njobs = d['split']

                #substitute some job parameters by json file parameters
Ejemplo n.º 7
0
def getFileList(procData,DefaultNFilesPerJob):
   global nonLocalSamples
   global isLocalSample
   isLocalSample = False

   FileList = [];
   nonMiniAODSamples = []
   miniAODSamples = getByLabel(procData,'miniAOD',[])
   dsetSamples = getByLabel(procData,'dset',[])
   for s in dsetSamples: 
      if("/MINIAOD" in s): miniAODSamples+=[s]
      else: nonMiniAODSamples+=[s]

   for sample in miniAODSamples:
      
      instance = ""
      if(len(getByLabel(procData,'dbsURL',''))>0): instance =  "instance=prod/"+ getByLabel(procData,'dbsURL','')
      listSites = DASQuery('site dataset='+sample + ' ' + instance + ' | grep site.name,site.replica_fraction')
      IsOnLocalTier=False
      MaxFraction=0;  FractionOnLocal=-1;
      for site in listSites.split('\n'):
         if(localTier==""):continue;
         try:
            MaxFraction = max(MaxFraction, float(site.split()[2].replace('%','').replace('"','')) )
         except:
            MaxFraction = max(MaxFraction, 0.0);
         if(localTier in site):
            FractionOnLocal = float(site.split()[2].replace('%','').replace('"',''));

      if(FractionOnLocal == MaxFraction):
            IsOnLocalTier=True            
            print ("Sample is found to be on the local grid tier %s (%f%%) for %s") %(localTier, FractionOnLocal, sample)

      isLocalSample = IsOnLocalTier

      if(localTier != "" and not IsOnLocalTier):
         nonLocalSamples += [sample]

      list = [] 
      if(IsOnLocalTier or "/MINIAOD" in sample):
         list = []
         if(DatasetFileDB=="DAS"):
            list = DASQuery('file dataset='+sample + ' ' + instance).split()
         elif(DatasetFileDB=="DBS"):
            curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET "
            dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
            sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store '
            list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+sample+'"'+sedTheList)[1].split()

         list = [x for x in list if ".root" in x] #make sure that we only consider root files
         for i in range(0,len(list)):              
            if IsOnLocalTier:
               if  (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4"+list[i]
               elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i]
               else:                                  list[i] = "root://eoscms//eos/cms"+list[i]            
            else:
               list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide
              #list[i] = "root://xrootd-cms.infn.it/"+list[i]    #optimal for EU side
              #list[i] = "root://cmsxrootd.fnal.gov/"+list[i]    #optimal for US side

      else:
         print "Processing private local sample: " + sample 
         list = storeTools.fillFromStore(sample,0,-1,True);                  

      list = storeTools.keepOnlyFilesFromGoodRun(list, os.path.expandvars(getByLabel(procData,'lumiMask','')))       
      split=getByLabel(procData,'split',-1)
      if(split>0):
         NFilesPerJob = max(1,len(list)/split)
      else:
         NFilesPerJob = DefaultNFilesPerJob
         if((len(list)/NFilesPerJob)>100):NFilesPerJob=len(list)/100;  #make sure the number of jobs isn't too big

      for g in range(0, len(list), NFilesPerJob):
         groupList = ''
         for f in list[g:g+NFilesPerJob]:
            groupList += '"'+f+'",\\n';
         FileList.append(groupList)

   for sample in nonMiniAODSamples:
      split=getByLabel(procData,'split',-1)
      for segment in range(0,split) :
         print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
         eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
         if(eventsFile.find('/store/')==0)  : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1]
         FileList.append('"'+eventsFile+'"')
   return FileList
Ejemplo n.º 8
0
                itag = d['dtag']
                if (itag.find(opt.onlytag) < 0): continue

            inputdir = opt.inDir + "/" + d['dtag']
            print str(d['dtag']) + " --> " + inputdir

            if (opt.duplicates):
                dirToClean = inputdir
                if (dirToClean.find('/storage/data/cms/store/')):
                    dirToClean = dirToClean.replace(
                        '/storage/data/cms/store/',
                        '/storage_rw/data/cms/store/')  #Hack for Louvain T2
                removeDuplicates(dirToClean)

            filenames = LaunchOnCondor.natural_sort(
                fillFromStore(inputdir, 0, -1, False))
            nfiles = len(filenames)
            filenames = addPrefixSuffixToFileList("   '", filenames, "',")

            split = getByLabel(d, 'split', 1)
            NFilesToMerge = nfiles // split
            NFilesToMergeRemains = nfiles % split
            startFile = 0
            endFile = 0
            for segment in range(0, split):
                startFile = endFile
                endFile = endFile + NFilesToMerge
                if (NFilesToMergeRemains > 0):
                    endFile += 1
                    NFilesToMergeRemains -= 1
                                
            if(xsec>0 and not isdata) :
                for ibr in br :  xsec = xsec*ibr
            split=getByLabel(d,'split',1)

            FileList = [];
            miniAODSamples = getByLabel(d,'miniAOD','')
            if(("/MINIAOD" in getByLabel(d,'dset','')) or len(getByLabel(d,'miniAOD',''))>0):
               listSites = commands.getstatusoutput('das_client.py --query="site dataset='+getByLabel(d,'dset','') + '" --limit=0')[1]

               list = []
               if(localTier in listSites and "CERN" in localTier):
                  list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(d,'dset','') + '" --limit=0')[1].split()
                  for i in range(0,len(list)): list[i] = "root://eoscms//eos/cms"+list[i]
               elif(len(getByLabel(d,'miniAOD',''))>0):
                  list = storeTools.fillFromStore(getByLabel(d,'miniAOD',''),0,-1,True);                  
               elif("/MINIAODSIM" in getByLabel(d,'dset','')):

                  if(not kInitDone):
                     print "You are going to run on a sample over grid using the AAA protocol, it is therefore needed to initialize your grid certificate"
                     os.system('mkdir -p ~/x509_user_proxy; voms-proxy-init -voms cms -valid 192:00 --out ~/x509_user_proxy/proxy')#all must be done in the same command to avoid environement problems.  Note that the first sourcing is only needed in Louvain
                     initialCommand = 'export X509_USER_PROXY=~/x509_user_proxy/proxy;voms-proxy-init --noregen;'
                     kInitDone = True


                  print("Use das_client.py to list files from : " + getByLabel(d,'dset','') )
                  list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(d,'dset','') + '" --limit=0')[1].split()
                  for i in range(0,len(list)): list[i] = "root://cms-xrd-global.cern.ch/"+list[i]
               else:
                  list = storeTools.fillFromStore(getByLabel(d,'miniAOD',''),0,-1,True);
Ejemplo n.º 10
0
            alldirs=[]
            try :
                alldirs = d[opt.dirtag]
            except:
                continue

            #tag veto
            if(opt.onlytag!='all') :
                itag=d['dtag']
                if(itag.find(opt.onlytag)<0) : continue

            idir=0
            for dir in alldirs:
                idir=idir+1
                filenames=fillFromStore(dir,0,-1,False)
                nfiles=len(filenames)

                njobs=1
		sleep=0;
                if(opt.fperjob>0) : 
                    njobs=nfiles//opt.fperjob
                    if(nfiles%opt.fperjob>0):njobs = njobs+1
                    sleep=2*opt.fperjob/6

                #special case for event generation
                if dir=="none" :
                    opt.fperjob=d['npersplit']
                    njobs=d['split']

                #substitute some job parameters by json file parameters