Example #1
0
def getFileList(procData, DefaultNFilesPerJob):
    global nonLocalSamples
    global isLocalSample
    isLocalSample = False

    FileList = []
    nonMiniAODSamples = []
    miniAODSamples = getByLabel(procData, 'miniAOD', [])
    dsetSamples = getByLabel(procData, 'dset', [])
    for s in dsetSamples:
        if ("/MINIAOD" in s): miniAODSamples += [s]
        else: nonMiniAODSamples += [s]

    for sample in miniAODSamples:

        instance = ""
        if (len(getByLabel(procData, 'dbsURL', '')) > 0):
            instance = "instance=prod/" + getByLabel(procData, 'dbsURL', '')
        listSites = DASQuery('site dataset=' + sample + ' ' + instance +
                             ' | grep site.name,site.replica_fraction')
        IsOnLocalTier = False
        MaxFraction = 0
        FractionOnLocal = -1
        for site in listSites.split('\n'):
            if (localTier == ""): continue
            try:
                #	    MaxFraction = max(MaxFraction, float(site.split()[1].replace('%','')) )
                MaxFraction = max(
                    MaxFraction,
                    float(site.split()[2].replace('%', '').replace('"', '')))
            except:
                MaxFraction = max(MaxFraction, 0.0)
            if (localTier in site):
                #FractionOnLocal = float(site.split()[1].replace('%',''));
                FractionOnLocal = float(site.split()[2].replace('%',
                                                                '').replace(
                                                                    '"', ''))

        if (FractionOnLocal == MaxFraction):
            IsOnLocalTier = True
            print(
                "Sample is found to be on the local grid tier %s (%f%%) for %s"
            ) % (localTier, FractionOnLocal, sample)

        isLocalSample = IsOnLocalTier

        if (localTier != "" and not IsOnLocalTier):
            nonLocalSamples += [sample]

        list = []
        if (IsOnLocalTier or "/MINIAOD" in sample):
            list = []
            if (DatasetFileDB == "DAS"):
                list = DASQuery('file dataset=' + sample + ' ' +
                                instance).split()
#print list
# print "\n"
            elif (DatasetFileDB == "DBS"):
                curlCommand = "curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET "
                dbsPath = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
                sedTheList = ' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store '
                list = commands.getstatusoutput(initialCommand + curlCommand +
                                                '"' + dbsPath +
                                                '/files?dataset=' + sample +
                                                '"' + sedTheList)[1].split()

            list = [x for x in list if ".root" in x
                    ]  #make sure that we only consider root files
            for i in range(0, len(list)):
                if IsOnLocalTier:
                    if (hostname.find("iihe.ac.be") != -1):
                        list[
                            i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4" + list[
                                i]
                    elif (hostname.find("ucl.ac.be") != -1):
                        list[i] = "/storage/data/cms" + list[i]
                    else:
                        list[i] = "root://eoscms//eos/cms" + list[i]
                    #print list[i]
                else:
                    list[i] = "root://cms-xrd-global.cern.ch/" + list[
                        i]  #works worldwide
                #list[i] = "root://xrootd-cms.infn.it/"+list[i]    #optimal for EU side
                #list[i] = "root://cmsxrootd.fnal.gov/"+list[i]    #optimal for US side

        else:
            print "Processing private local sample: " + sample
            list = storeTools.fillFromStore(sample, 0, -1, True)

        list = storeTools.keepOnlyFilesFromGoodRun(
            list, os.path.expandvars(getByLabel(procData, 'lumiMask', '')))
        #   print len(list)
        split = getByLabel(procData, 'split', -1)
        if (split > 0):
            NFilesPerJob = max(1, len(list) / split)
        else:
            NFilesPerJob = DefaultNFilesPerJob
            if ((len(list) / NFilesPerJob) > 100):
                NFilesPerJob = len(list) / 100
                #make sure the number of jobs isn't too big

        for g in range(0, len(list), NFilesPerJob):
            #	 print g
            groupList = ''
            for f in list[g:g + NFilesPerJob]:
                groupList += '"' + f + '",\\n'

#print f
            FileList.append(groupList)

    for sample in nonMiniAODSamples:
        print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(
            segment) + '.root'
        for segment in range(0, split):
            eventsFile = opt.indir + '/' + origdtag + '_' + str(
                segment) + '.root'
            if (eventsFile.find('/store/') == 0):
                eventsFile = commands.getstatusoutput('cmsPfn ' +
                                                      eventsFile)[1]
            FileList.append('"' + eventsFile + '"')
    return FileList
def getFileList(procData,DefaultNFilesPerJob):
   global nonLocalSamples
   global isLocalSample
   isLocalSample = False

   FileList = [];
   miniAODSamples = getByLabel(procData,'miniAOD','')
   isMINIAODDataset = ("/MINIAOD" in getByLabel(procData,'dset','')) or  ("amagitte" in getByLabel(procData,'dset',''))
   if(isMINIAODDataset or len(getByLabel(procData,'miniAOD',''))>0):
      instance = ""
      if(len(getByLabel(procData,'dbsURL',''))>0): instance =  "instance=prod/"+ getByLabel(procData,'dbsURL','')
      listSites = commands.getstatusoutput('das_client.py --query="site dataset='+getByLabel(procData,'dset','') + ' ' + instance + ' | grep site.name,site.dataset_fraction " --limit=0')[1]
      IsOnLocalTier=False
      for site in listSites.split('\n'):
         if(localTier != "" and localTier in site and '100.00%' in site):
            IsOnLocalTier=True            
            print ("Sample is found to be on the local grid tier (%s): %s") %(localTier, site)
            break
      isLocalSample = IsOnLocalTier

      if(localTier != "" and not IsOnLocalTier):
         nonLocalSamples += [getByLabel(procData,'dset','')]

      list = []
      if(IsOnLocalTier or isMINIAODDataset):
         list = []
         if(DatasetFileDB=="DAS"):
            list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(procData,'dset','') + ' ' + instance + '" --limit=0')[1].split()
         elif(DatasetFileDB=="DBS"):
            curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET "
            dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
            sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store '
            list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+getByLabel(procData,'dset','')+'"'+sedTheList)[1].split()

         list = [x for x in list if ".root" in x] #make sure that we only consider root files
         for i in range(0,len(list)):              
            if IsOnLocalTier:
               if  (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be:/pnfs/iihe/cms/ph/sc4"+list[i]
               elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i]
               else:                                  list[i] = "root://eoscms//eos/cms"+list[i]            
            else:
               list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide
              #list[i] = "root://xrootd-cms.infn.it/"+list[i]    #optimal for EU side
              #list[i] = "root://cmsxrootd.fnal.gov/"+list[i]    #optimal for US side

      elif(len(getByLabel(procData,'miniAOD',''))>0):
         print "Processing private local sample: " + getByLabel(procData,'miniAOD','')
         list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True);                  
      else:
         print "Processing an unknown type of sample (assuming it's a private local sample): " + getByLabel(procData,'miniAOD','')
         list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True);

      list = storeTools.keepOnlyFilesFromGoodRun(list, getByLabel(procData,'lumiMask',''))       
      split=getByLabel(procData,'split',-1)
      if(split>0):
         NFilesPerJob = max(1,len(list)/split)
      else:
         NFilesPerJob = DefaultNFilesPerJob

      for g in range(0, len(list), NFilesPerJob):
         groupList = ''
         for f in list[g:g+NFilesPerJob]:
            groupList += '"'+f+'",\\n';
         FileList.append(groupList)

   else:
      print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
      for segment in range(0,split) :
         eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
         if(eventsFile.find('/store/')==0)  : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1]
         FileList.append('"'+eventsFile+'"')
   return FileList
def getFileList(procData,DefaultNFilesPerJob):
   global nonLocalSamples
   global isLocalSample
   isLocalSample = False

   FileList = [];
   nonMiniAODSamples = []
   miniAODSamples = getByLabel(procData,'miniAOD',[])
   dsetSamples = getByLabel(procData,'dset',[])
   for s in dsetSamples: 
      if("/MINIAOD" in s): miniAODSamples+=[s]
      else: nonMiniAODSamples+=[s]

   for sample in miniAODSamples:
      
      instance = ""
      if(len(getByLabel(procData,'dbsURL',''))>0): instance =  "instance=prod/"+ getByLabel(procData,'dbsURL','')
      listSites = DASQuery('site dataset='+sample + ' ' + instance + ' | grep site.name,site.replica_fraction')
      IsOnLocalTier=False
      MaxFraction=0;  FractionOnLocal=-1;
      for site in listSites.split('\n'):
         if(localTier==""):continue;
         try:
            MaxFraction = max(MaxFraction, float(site.split()[2].replace('%','').replace('"','')) )
         except:
            MaxFraction = max(MaxFraction, 0.0);
         if(localTier in site):
            FractionOnLocal = float(site.split()[2].replace('%','').replace('"',''));

      if(FractionOnLocal == MaxFraction):
            IsOnLocalTier=True            
            print ("Sample is found to be on the local grid tier %s (%f%%) for %s") %(localTier, FractionOnLocal, sample)

      isLocalSample = IsOnLocalTier

      if(localTier != "" and not IsOnLocalTier):
         nonLocalSamples += [sample]

      list = [] 
      if(IsOnLocalTier or "/MINIAOD" in sample):
         list = []
         if(DatasetFileDB=="DAS"):
            list = DASQuery('file dataset='+sample + ' ' + instance).split()
         elif(DatasetFileDB=="DBS"):
            curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET "
            dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
            sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store '
            list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+sample+'"'+sedTheList)[1].split()

         list = [x for x in list if ".root" in x] #make sure that we only consider root files
         for i in range(0,len(list)):              
            if IsOnLocalTier:
               if  (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4"+list[i]
               elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i]
               else:                                  list[i] = "root://eoscms//eos/cms"+list[i]            
            else:
               list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide
              #list[i] = "root://xrootd-cms.infn.it/"+list[i]    #optimal for EU side
              #list[i] = "root://cmsxrootd.fnal.gov/"+list[i]    #optimal for US side

      else:
         print "Processing private local sample: " + sample 
         list = storeTools.fillFromStore(sample,0,-1,True);                  

      list = storeTools.keepOnlyFilesFromGoodRun(list, os.path.expandvars(getByLabel(procData,'lumiMask','')))       
      split=getByLabel(procData,'split',-1)
      if(split>0):
         NFilesPerJob = max(1,len(list)/split)
      else:
         NFilesPerJob = DefaultNFilesPerJob
         if((len(list)/NFilesPerJob)>100):NFilesPerJob=len(list)/100;  #make sure the number of jobs isn't too big

      for g in range(0, len(list), NFilesPerJob):
         groupList = ''
         for f in list[g:g+NFilesPerJob]:
            groupList += '"'+f+'",\\n';
         FileList.append(groupList)

   for sample in nonMiniAODSamples:
      split=getByLabel(procData,'split',-1)
      for segment in range(0,split) :
         print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
         eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root'
         if(eventsFile.find('/store/')==0)  : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1]
         FileList.append('"'+eventsFile+'"')
   return FileList