예제 #1
0
파일: dataset.py 프로젝트: SiewYan/cmssw
def dasquery(dasQuery, dasLimit=0):
  dasData = das_client.get_data(dasQuery, dasLimit)
  if isinstance(dasData, str):
    jsondict = json.loads( dasData )
  else:
    jsondict = dasData
  # Check, if the DAS query fails
  try:
    error = findinjson(jsondict, "data","error")
  except KeyError:
    error = None
  if error or findinjson(jsondict, "status") != 'ok' or "data" not in jsondict:
    try:
      jsonstr = findinjson(jsondict, "reason")
    except KeyError: 
      jsonstr = str(jsondict)
    if len(jsonstr) > 10000:
      jsonfile = "das_query_output_%i.txt"
      i = 0
      while os.path.lexists(jsonfile % i):
        i += 1
      jsonfile = jsonfile % i
      theFile = open( jsonfile, "w" )
      theFile.write( jsonstr )
      theFile.close()
      msg = "The DAS query returned an error.  The output is very long, and has been stored in:\n" + jsonfile
    else:
      msg = "The DAS query returned a error.  Here is the output\n" + jsonstr
    msg += "\nIt's possible that this was a server error.  If so, it may work if you try again later"
    raise DatasetError(msg)
  return findinjson(jsondict, "data")
예제 #2
0
def das_client(query, check_key = None):
    """
    Submit `query` to DAS client and handle possible errors.
    Further treatment of the output might be necessary.
    Arguments:
    - `query`: DAS query
    - `check_key`: optional key to be checked for; retriggers query if needed
    """

    error = True
    for i in xrange(5):         # maximum of 5 tries
        das_data = cmssw_das_client.get_data(query, limit = 0)

        if das_data["status"] == "ok":
            if das_data["nresults"] == 0 or check_key is None:
                error = False
                break

            result_count = 0
            for d in find_key(das_data["data"], check_key):
                result_count += len(d)
            if result_count == 0:
                das_data["status"] = "error"
                das_data["reason"] = ("DAS did not return required data.")
                continue
            else:
                error = False
                break

    if das_data["status"] == "error":
        print_msg("DAS query '{}' failed 5 times. "
                  "The last time for the the following reason:".format(query))
        print das_data["reason"]
        sys.exit(1)
    return das_data["data"]
예제 #3
0
def add_rawRelVals(process):   
   query='dataset file=%s' % process.source.fileNames[0]
   dataset = cmssw_das_client.get_data(query, limit = 0)
   if not dataset:
      raise RuntimeError(
         'Das returned no dataset parent of the input file: %s \n'
         'The parenthood is needed to add RAW secondary input files' % process.source.fileNames[0]
         )
   raw_dataset = dataset['data'][0]['dataset'][0]['name'].replace('GEN-SIM-RECO','GEN-SIM-DIGI-RAW-HLTDEBUG')
   raw_files = cmssw_das_client.get_data('file dataset=%s' % raw_dataset, limit=0)['data']
   
   if not raw_files:
      raise RuntimeError('No files found belonging to the GEN-SIM-DIGI-RAW-HLTDEBUG sample!')

   #convert from unicode into normal string since vstring does not pick it up
   raw_files = [str(i) for i in raw_files]
   process.source.secondaryFileNames = cms.untracked.vstring(*raw_files)
   return process
예제 #4
0
def add_rawRelVals(process, inputName):   
   query='dataset='+inputName 
   dataset = cmssw_das_client.get_data(query, limit = 0)
   if not dataset:
      raise RuntimeError(
         'Das returned no dataset parent of the input file: %s \n'
         'The parenthood is needed to add RAW secondary input files' % process.source.fileNames[0]
         )
   for i in dataset['data']:
	try: n_files = i['dataset'][0]['num_file']
	except: pass
   raw_files = cmssw_das_client.get_data('file '+query, limit = 0)
   files = []
   for i in raw_files['data']:
	files.append( i['file'][0]['name'])
   
   raw_files = ['root://cms-xrd-global.cern.ch/'+str(i) for i in files]
   process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring(raw_files))
   return process
def getFileNames_das_client():
###################################################################
    """Return files for given DAS query via das_client"""
    files = []

    query = "dataset dataset=/ZeroBias/Run2*SiStripCalMinBias-*/ALCARECO site=T2_CH_CERN" 
    jsondict = das_client.get_data(query)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s"%(status)
        return files

    data =  jsondict['data']
    viableDS = []
    for element in data:
        viableDS.append(element['dataset'][0]['name'])

    print "Using Dataset:",viableDS[-1]

    query = "file dataset=%s site=T2_CH_CERN | grep file.name" % viableDS[-1]
    jsondict = das_client.get_data(query)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s"%(status)
        return files

    mongo_query = jsondict['mongo_query']
    filters = mongo_query['filters']
    data = jsondict['data']

    files = []
    for row in data:
        the_file = [r for r in das_client.get_value(row, filters['grep'])][0]
        if len(the_file) > 0 and not the_file in files:
            files.append(the_file)

    return files
예제 #6
0
def das_client(query):
    """
    Submit `query` to DAS client and handle possible errors.
    Further treatment of the output might be necessary.

    Arguments:
    - `query`: DAS query
    """
    for _ in xrange(3):         # maximum of 3 tries
        das_data = cmssw_das_client.get_data(query, limit = 0)
        if das_data["status"] != "error": break
    if das_data["status"] == "error":
        print_msg("DAS query '{}' failed 3 times. "
                  "The last time for the the following reason:".format(query))
        print das_data["reason"]
        sys.exit(1)
    return das_data["data"]
예제 #7
0
def das_client(query, check_key = None):
    """
    Submit `query` to DAS client and handle possible errors.
    Further treatment of the output might be necessary.

    Arguments:
    - `query`: DAS query
    - `check_key`: optional key to be checked for; retriggers query if needed
    """

    error = True
    for i in range(5):         # maximum of 5 tries
        try:
            das_data = cmssw_das_client.get_data(query, limit = 0)
        except IOError as e:
            if e.errno == 14: #https://stackoverflow.com/q/36397853/5228524
                continue
        except ValueError as e:
            if str(e) == "No JSON object could be decoded":
                continue

        if das_data["status"] == "ok":
            if das_data["nresults"] == 0 or check_key is None:
                error = False
                break

            result_count = 0
            for d in find_key(das_data["data"], [check_key]):
                result_count += len(d)
            if result_count == 0:
                das_data["status"] = "error"
                das_data["reason"] = ("DAS did not return required data.")
                continue
            else:
                error = False
                break

    if das_data["status"] == "error":
        print_msg("DAS query '{}' failed 5 times. "
                  "The last time for the the following reason:".format(query))
        print(das_data["reason"])
        sys.exit(1)
    return das_data["data"]
예제 #8
0
def das_client(query, check_key=None):
    """
    Submit `query` to DAS client and handle possible errors.
    Further treatment of the output might be necessary.

    Arguments:
    - `query`: DAS query
    - `check_key`: optional key to be checked for; retriggers query if needed
    """

    error = True
    for i in range(5):  # maximum of 5 tries
        try:
            das_data = cmssw_das_client.get_data(query, limit=0)
        except IOError as e:
            if e.errno == 14:  #https://stackoverflow.com/q/36397853/5228524
                continue
        except ValueError as e:
            if str(e) == "No JSON object could be decoded":
                continue

        if das_data["status"] == "ok":
            if das_data["nresults"] == 0 or check_key is None:
                error = False
                break

            result_count = 0
            for d in find_key(das_data["data"], [check_key]):
                result_count += len(d)
            if result_count == 0:
                das_data["status"] = "error"
                das_data["reason"] = ("DAS did not return required data.")
                continue
            else:
                error = False
                break

    if das_data["status"] == "error":
        print_msg("DAS query '{}' failed 5 times. "
                  "The last time for the the following reason:".format(query))
        print(das_data["reason"])
        sys.exit(1)
    return das_data["data"]
예제 #9
0
def get_mc_lumi_list(inputDataset="/QCD_Pt_300to470_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic*/MINIAODSIM"):
    """Get the LumiList object(s) for dataset(s) matching `inputDataset`

    inputDataset:
        if a str, will ask DAS to autocomplate (can contain wildcards)
        if a list/tuple/set[str], will iterate over each entry in the list, without asking DAS to autocomplete.
        This is because the user might have cached the dataset names before calling this function, and we don't want to call DAS more than necessary.

    returns: a dict with an entry for each dataset user inputs with das string as key and LumiList as value

    raises RuntimeError if no valid voms proxy
    raises TypeError if inputDataset incorrect type
    """
    if not check_voms():
        raise RuntimeError("Missing voms proxy")

    if isinstance(inputDataset, str):
        inputDatasets = autocomplete_Datasets([inputDataset])
    elif not isinstance(inputDataset, (list, set, tuple)):
        raise TypeError('get_mc_lumi_list: `inputDataset` expects str or list/tuple/set[str]')

    result = {}
    for dataset in inputDatasets:
        print(dataset)
        json_dict = get_data(host='https://cmsweb.cern.ch', query="run lumi file dataset="+dataset, idx=0, limit=0, threshold=300)
        lumi_list = LumiList.LumiList()
        try:
            n_files = len(json_dict['data'])
            printout = round(n_files / 10)
            for i, file_info in enumerate(json_dict['data']):
                if (i>n_files):
                    break
                if i % printout == 0:
                    print("{}% done...".format(100 * i / n_files))
                ls = file_info['lumi'][0]['number']
                run = file_info['run'][0]['run_number']
                lumi_list += LumiList.LumiList(runsAndLumis={run: ls})
        except Exception as e:
            print('Did not find lumis for', dataset)
            print(e)
        result.update({dataset:lumi_list})
    return result
예제 #10
0
def getFileNames(event):
    files = []

    query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
    jsondict = das_client.get_data(query)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s" % (status)
        return files

    mongo_query = jsondict['mongo_query']
    filters = mongo_query['filters']
    data = jsondict['data']

    files = []
    for row in data:
        file = [r for r in das_client.get_value(row, filters['grep'])][0]
        if len(file) > 0 and not file in files:
            files.append(file)

    return files
예제 #11
0
def getFileNames (event):
    files = []

    query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
    jsondict = das_client.get_data(query)
    status = jsondict['status']
    if status != 'ok':
        print "DAS query status: %s"%(status)
        return files

    mongo_query = jsondict['mongo_query']
    filters = mongo_query['filters']
    data = jsondict['data']

    files = []
    for row in data:
        file = [r for r in das_client.get_value(row, filters['grep'])][0]
        if len(file) > 0 and not file in files:
            files.append(file)

    return files
예제 #12
0
    def __getData(self, dasQuery, dasLimit=0):
        dasData = das_client.get_data(
            dasQuery,
            dasLimit,
            ############################################
            #can remove this once dasgoclient is updated
            cmd="das_client" if olddas else None
            ############################################
        )
        if isinstance(dasData, str):
            jsondict = json.loads(dasData)
        else:
            jsondict = dasData

    # Check, if the DAS query fails
        try:
            error = self.__findInJson(jsondict, ["data", "error"])
        except KeyError:
            error = None
        if error or self.__findInJson(
                jsondict, "status") != 'ok' or "data" not in jsondict:
            try:
                jsonstr = self.__findInJson(jsondict, "reason")
            except KeyError:
                jsonstr = str(jsondict)
            if len(jsonstr) > 10000:
                jsonfile = "das_query_output_%i.txt"
                i = 0
                while os.path.lexists(jsonfile % i):
                    i += 1
                jsonfile = jsonfile % i
                theFile = open(jsonfile, "w")
                theFile.write(jsonstr)
                theFile.close()
                msg = "The DAS query returned an error.  The output is very long, and has been stored in:\n" + jsonfile
            else:
                msg = "The DAS query returned a error.  Here is the output\n" + jsonstr
            msg += "\nIt's possible that this was a server error.  If so, it may work if you try again later"
            raise AllInOneError(msg)
        return self.__findInJson(jsondict, "data")
예제 #13
0
파일: DasQuery.py 프로젝트: UHH2/UHH2
def autocomplete_Datasets(data):
    result_array =[]

    for element in data:
        if '*' in element:
            jsondict = get_data(host='https://cmsweb.cern.ch',query="dataset="+element,idx=0,limit=0,threshold=300)
            #print json.dumps(jsondict, indent=4, sort_keys=True)
            #print json.dumps(jsondict['data'], indent=4, sort_keys=True)
            try:
                for i in range(len(jsondict['data'])):
                    result_array.append(jsondict['data'][i]['dataset'][0]['name'])
            except:
                print '='*10
                print 'Not found',element
                print '='*10
        else:
            result_array.append(element)
    if len(result_array) == 0: 
        print "No samples found going to exit"
        sys.exit(0)
    # Do this to remove duplicates but maintain order of insertion
    # We get duplicates because it queries ALL databases not just the main one
    # https://github.com/dmwm/DAS/issues/4287#issuecomment-390278822
    return sorted(set(result_array), key=result_array.index)
예제 #14
0
def das_client(query, check_key=None):
    """
    Submit `query` to DAS client and handle possible errors.
    Further treatment of the output might be necessary.

    Arguments:
    - `query`: DAS query
    - `check_key`: optional key to be checked for; retriggers query if needed
    """

    error = True
    for i in xrange(5):  # maximum of 5 tries
        das_data = cmssw_das_client.get_data(query, limit=0)

        if das_data["status"] == "ok":
            if das_data["nresults"] == 0 or check_key is None:
                error = False
                break

            result_count = 0
            for d in find_key(das_data["data"], check_key):
                result_count += len(d)
            if result_count == 0:
                das_data["status"] = "error"
                das_data["reason"] = ("DAS did not return required data.")
                continue
            else:
                error = False
                break

    if das_data["status"] == "error":
        print_msg("DAS query '{}' failed 5 times. "
                  "The last time for the the following reason:".format(query))
        print das_data["reason"]
        sys.exit(1)
    return das_data["data"]
예제 #15
0
    def apply( self ):
        useDAS        = self._parameters[ 'useDAS'        ].value
        cmsswVersion  = self._parameters[ 'cmsswVersion'  ].value
        formerVersion = self._parameters[ 'formerVersion' ].value
        relVal        = self._parameters[ 'relVal'        ].value
        dataTier      = self._parameters[ 'dataTier'      ].value
        condition     = self._parameters[ 'condition'     ].value # only used for GT determination in initialization, if GT not explicitly given
        globalTag     = self._parameters[ 'globalTag'     ].value
        maxVersions   = self._parameters[ 'maxVersions'   ].value
        skipFiles     = self._parameters[ 'skipFiles'     ].value
        numberOfFiles = self._parameters[ 'numberOfFiles' ].value
        debug         = self._parameters[ 'debug'         ].value

        filePaths = []

        # Determine corresponding CMSSW version for RelVals
        preId      = '_pre'
        patchId    = '_patch'    # patch releases
        hltPatchId = '_hltpatch' # HLT patch releases
        dqmPatchId = '_dqmpatch' # DQM patch releases
        slhcId     = '_SLHC'     # SLHC releases
        rootId     = '_root'     # ROOT test releases
        ibId       = '_X_'       # IBs
        if patchId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
        elif hltPatchId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
        elif dqmPatchId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
        elif rootId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
        elif slhcId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
        elif ibId in cmsswVersion or formerVersion:
            outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
            if len( outputTuple[ 1 ] ) != 0:
                print('%s INFO : SCRAM error'%( self._label ))
                if debug:
                    print('    from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
                    print()
                    print(outputTuple[ 1 ])
                    print()
                    self.messageEmptyList()
                return filePaths
            versions = { 'last'      :''
                       , 'lastToLast':''
                       }
            for line in outputTuple[ 0 ].splitlines():
                version = line.split()[ 1 ]
                if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
                    if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
                        versions[ 'lastToLast' ] = versions[ 'last' ]
                        versions[ 'last' ]       = version
                        if version == cmsswVersion:
                            break
            # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1')
            if formerVersion:
                # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0
                if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
                    versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-)
                # Use pre-release as "former version" for CMSSW_X_Y_0
                elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
                    versions[ 'lastToLast' ] = ''
                    for line in outputTuple[ 0 ].splitlines():
                        version      = line.split()[ 1 ]
                        versionParts = version.partition( preId )
                        if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
                            versions[ 'lastToLast' ] = version
                        elif versions[ 'lastToLast' ] != '':
                            break
                # Don't use CMSSW_X_Y_0 as "former version" for pre-releases
                elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
                    versions[ 'lastToLast' ] = '' # no alternative :-(
                cmsswVersion = versions[ 'lastToLast' ]
            else:
                cmsswVersion = versions[ 'last' ]

        # Debugging output
        if debug:
            print('%s DEBUG: Called with...'%( self._label ))
            for key in self._parameters.keys():
               print('    %s:\t'%( key ), end=' ')
               print(self._parameters[ key ].value, end=' ')
               if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
                   print(' (default)')
               else:
                   print()
               if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
                   if formerVersion:
                       print('    ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
                   else:
                       print('    ==> modified to last valid release %s'%( cmsswVersion ))

        # Check domain
        domain = socket.getfqdn().split( '.' )
        domainSE = ''
        if len( domain ) == 0:
            print('%s INFO : Cannot determine domain of this computer'%( self._label ))
            if debug:
                self.messageEmptyList()
            return filePaths
        elif os.uname()[0] == "Darwin":
            print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ))
            if debug:
                self.messageEmptyList()
            return filePaths
        elif len( domain ) == 1:
            print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ))
            if debug:
                self.messageEmptyList()
            return filePaths
        if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
            print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ))
            if debug:
                self.messageEmptyList()
            return filePaths
        if domain[ -2 ] == 'cern':
            domainSE = 'T2_CH_CERN'
        elif domain[ -2 ] == 'fnal':
            domainSE = 'T1_US_FNAL_MSS'
        if debug:
            print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ))
            print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ))

        # Find files
        validVersion = 0
        dataset    = ''
        datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
        if useDAS:
            if debug:
                print('%s DEBUG: Using DAS query'%( self._label ))
            dasLimit = numberOfFiles
            if dasLimit <= 0:
                dasLimit = 1
            for version in range( maxVersions, 0, -1 ):
                filePaths    = []
                filePathsTmp = []
                fileCount    = 0
                dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
                dasQuery = 'file dataset=%s | grep file.name'%( dataset )
                if debug:
                    print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ))
                    print('    \'%s\''%( dasQuery ))
                jsondict = das_client.get_data(dasQuery,dasLimit)
                if debug:
                    print('%s DEBUG: Received DAS JSON dictionary:'%( self._label ))
                    print('    \'%s\''%( jsondict ))
                if jsondict[ 'status' ] != 'ok':
                    print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
                    exit( 1 )
                mongo_query = jsondict[ 'mongo_query' ]
                filters     = mongo_query[ 'filters' ]
                data        = jsondict[ 'data' ]
                if debug:
                    print('%s DEBUG: Query in JSON dictionary:'%( self._label ))
                    print('    \'%s\''%( mongo_query ))
                    print('%s DEBUG: Filters in query:'%( self._label ))
                    print('    \'%s\''%( filters ))
                    print('%s DEBUG: Data in JSON dictionary:'%( self._label ))
                    print('    \'%s\''%( data ))
                for row in data:
                    filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ]
                    if debug:
                        print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ))
                    if len( filePath ) > 0:
                        if validVersion != version:
                            jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ),  999)
                            mongo_testquery = jsontestdict[ 'mongo_query' ]
                            testfilters = mongo_testquery[ 'filters' ]
                            testdata    = jsontestdict[ 'data' ]
                            if debug:
                                print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label ))
                                print('    \'%s\''%( jsontestdict ))
                                print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label ))
                                print('    \'%s\''%( mongo_testquery ))
                                print('%s DEBUG: Filters in query (site test):'%( self._label ))
                                print('    \'%s\''%( testfilters ))
                                print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label ))
                                print('    \'%s\''%( testdata ))
                            foundSE = False
                            for testrow in testdata:
                                siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ]
                                if siteName == domainSE:
                                    foundSE = True
                                    break
                            if not foundSE:
                                if debug:
                                    print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ))
                                break
                            validVersion = version
                            if debug:
                                print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ))
                        if numberOfFiles == 0:
                            break
                        # protect from double entries ( 'unique' flag in query does not work here)
                        if not filePath in filePathsTmp:
                            filePathsTmp.append( filePath )
                            if debug:
                                print('%s DEBUG: File \'%s\' found'%( self._label, filePath ))
                            fileCount += 1
                            # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles")
                            if fileCount > skipFiles:
                                filePaths.append( filePath )
                        elif debug:
                            print('%s DEBUG: File \'%s\' found again'%( self._label, filePath ))
                if validVersion > 0:
                    if numberOfFiles == 0 and debug:
                        print('%s DEBUG: No files requested'%( self._label ))
                    break
        else:
            if debug:
                print('%s DEBUG: Using DBS query'%( self._label ))
            print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label ))
            #for version in range( maxVersions, 0, -1 ):
                #filePaths = []
                #fileCount = 0
                #dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
                #dbsQuery = 'find file where dataset = %s'%( dataset )
                #if debug:
                    #print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
                    #print '    \'%s\''%( dbsQuery )
                #foundSE = False
                #for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ).readlines():
                    #if line.find( '.root' ) != -1:
                        #if validVersion != version:
                            #if not foundSE:
                                #dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
                                #if debug:
                                    #print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE )
                                    #print '    \'%s\''%( dbsSiteQuery )
                                #for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ).readlines():
                                    #if lineSite.find( dataset ) != -1:
                                        #foundSE = True
                                        #break
                            #if not foundSE:
                                #if debug:
                                    #print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
                                #break
                            #validVersion = version
                            #if debug:
                                #print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
                        #if numberOfFiles == 0:
                            #break
                        #filePath = line.replace( '\n', '' )
                        #if debug:
                            #print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
                        #fileCount += 1
                        #if fileCount > skipFiles:
                            #filePaths.append( filePath )
                        #if not numberOfFiles < 0:
                            #if numberOfFiles <= len( filePaths ):
                                #break
                #if validVersion > 0:
                    #if numberOfFiles == 0 and debug:
                        #print '%s DEBUG: No files requested'%( self._label )
                    #break

        # Check output and return
        if validVersion == 0:
            print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ))
            if debug:
                self.messageEmptyList()
        elif len( filePaths ) == 0:
            print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ))
            if debug:
                self.messageEmptyList()
        elif len( filePaths ) < numberOfFiles:
            print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ))

        if debug:
            print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ))
        return filePaths
예제 #16
0
def das(query):
    data = das_client.get_data(query, 0)
    if data["status"] == "error": raise RuntimeError(data["reason"])
    return data["data"]
    name=row['name']
    weight=row['weighting on 1fb-1']
    generator=row['generator']
    isData=row['isData']
    isBoostedMiniAOD=row['boosted_dataset']
    #makeSystematicsTrees=row['makeSystematicsTrees']
    globalTag=row['globalTag']
#    if len(dataset)>r3:
#        requestname="'"+(dataset.split('/')[1])+"'"
    if dataset!='' and name!='':
        print 'checking dataset info for',dataset
        #ckey=das_client.x509()
        #cert=das_client.x509()
        #das_client.check_auth(ckey)
        #das_data=das_client.get_data("https://cmsweb.cern.ch","dataset="+dataset+" instance=phys/prod",0,0,0,300,ckey,cert)
        das_data=das_client.get_data("file dataset="+dataset+" instance=phys/prod")
        #print das_data
        #for d in das_data['data']:
	    ##print d
            #for dd in d['dataset']:
	        ##print dd
                #print dd['mcm']['nevents']


        outfilename=crab_cfg_path+'/crab_'+name+'.py'
        if os.path.exists(outfilename):
            for i in range(2,10):
                outfilename=crab_cfg_path+'/crab_'+name+'_v'+str(i)+'.py'
                if not os.path.exists(outfilename):
                    break
        crabout=open(outfilename,'w')
예제 #18
0
    def apply( self ):
        useDAS        = self._parameters[ 'useDAS'        ].value
        cmsswVersion  = self._parameters[ 'cmsswVersion'  ].value
        formerVersion = self._parameters[ 'formerVersion' ].value
        relVal        = self._parameters[ 'relVal'        ].value
        dataTier      = self._parameters[ 'dataTier'      ].value
        condition     = self._parameters[ 'condition'     ].value # only used for GT determination in initialization, if GT not explicitly given
        globalTag     = self._parameters[ 'globalTag'     ].value
        maxVersions   = self._parameters[ 'maxVersions'   ].value
        skipFiles     = self._parameters[ 'skipFiles'     ].value
        numberOfFiles = self._parameters[ 'numberOfFiles' ].value
        debug         = self._parameters[ 'debug'         ].value

        filePaths = []

        # Determine corresponding CMSSW version for RelVals
        preId      = '_pre'
        patchId    = '_patch'    # patch releases
        hltPatchId = '_hltpatch' # HLT patch releases
        dqmPatchId = '_dqmpatch' # DQM patch releases
        slhcId     = '_SLHC'     # SLHC releases
        rootId     = '_root'     # ROOT test releases
        ibId       = '_X_'       # IBs
        if patchId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
        elif hltPatchId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
        elif dqmPatchId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
        elif rootId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
        elif slhcId in cmsswVersion:
            cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
        elif ibId in cmsswVersion or formerVersion:
            outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
            if len( outputTuple[ 1 ] ) != 0:
                print('%s INFO : SCRAM error'%( self._label ))
                if debug:
                    print('    from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
                    print()
                    print(outputTuple[ 1 ])
                    print()
                    self.messageEmptyList()
                return filePaths
            versions = { 'last'      :''
                       , 'lastToLast':''
                       }
            for line in outputTuple[ 0 ].splitlines():
                version = line.split()[ 1 ]
                if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
                    if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
                        versions[ 'lastToLast' ] = versions[ 'last' ]
                        versions[ 'last' ]       = version
                        if version == cmsswVersion:
                            break
            # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1')
            if formerVersion:
                # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0
                if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
                    versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-)
                # Use pre-release as "former version" for CMSSW_X_Y_0
                elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
                    versions[ 'lastToLast' ] = ''
                    for line in outputTuple[ 0 ].splitlines():
                        version      = line.split()[ 1 ]
                        versionParts = version.partition( preId )
                        if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
                            versions[ 'lastToLast' ] = version
                        elif versions[ 'lastToLast' ] != '':
                            break
                # Don't use CMSSW_X_Y_0 as "former version" for pre-releases
                elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
                    versions[ 'lastToLast' ] = '' # no alternative :-(
                cmsswVersion = versions[ 'lastToLast' ]
            else:
                cmsswVersion = versions[ 'last' ]

        # Debugging output
        if debug:
            print('%s DEBUG: Called with...'%( self._label ))
            for key in self._parameters.keys():
               print('    %s:\t'%( key ), end=' ')
               print(self._parameters[ key ].value, end=' ')
               if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
                   print(' (default)')
               else:
                   print()
               if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
                   if formerVersion:
                       print('    ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
                   else:
                       print('    ==> modified to last valid release %s'%( cmsswVersion ))

        # Check domain
        domain = socket.getfqdn().split( '.' )
        domainSE = ''
        if len( domain ) == 0:
            print('%s INFO : Cannot determine domain of this computer'%( self._label ))
            if debug:
                self.messageEmptyList()
            return filePaths
        elif os.uname()[0] == "Darwin":
            print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ))
            if debug:
                self.messageEmptyList()
            return filePaths
        elif len( domain ) == 1:
            print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ))
            if debug:
                self.messageEmptyList()
            return filePaths
        if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
            print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ))
            if debug:
                self.messageEmptyList()
            return filePaths
        if domain[ -2 ] == 'cern':
            domainSE = 'T2_CH_CERN'
        elif domain[ -2 ] == 'fnal':
            domainSE = 'T1_US_FNAL_MSS'
        if debug:
            print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ))
            print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ))

        # Find files
        validVersion = 0
        dataset    = ''
        datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
        if useDAS:
            if debug:
                print('%s DEBUG: Using DAS query'%( self._label ))
            dasLimit = numberOfFiles
            if dasLimit <= 0:
                dasLimit = 1
            for version in range( maxVersions, 0, -1 ):
                filePaths    = []
                filePathsTmp = []
                fileCount    = 0
                dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
                dasQuery = 'file dataset=%s | grep file.name'%( dataset )
                if debug:
                    print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ))
                    print('    \'%s\''%( dasQuery ))
                jsondict = das_client.get_data(dasQuery,dasLimit)
                if debug:
                    print('%s DEBUG: Received DAS JSON dictionary:'%( self._label ))
                    print('    \'%s\''%( jsondict ))
                if jsondict[ 'status' ] != 'ok':
                    print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
                    exit( 1 )
                mongo_query = jsondict[ 'mongo_query' ]
                filters     = mongo_query[ 'filters' ]
                data        = jsondict[ 'data' ]
                if debug:
                    print('%s DEBUG: Query in JSON dictionary:'%( self._label ))
                    print('    \'%s\''%( mongo_query ))
                    print('%s DEBUG: Filters in query:'%( self._label ))
                    print('    \'%s\''%( filters ))
                    print('%s DEBUG: Data in JSON dictionary:'%( self._label ))
                    print('    \'%s\''%( data ))
                for row in data:
                    filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ]
                    if debug:
                        print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ))
                    if len( filePath ) > 0:
                        if validVersion != version:
                            jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ),  999)
                            mongo_testquery = jsontestdict[ 'mongo_query' ]
                            testfilters = mongo_testquery[ 'filters' ]
                            testdata    = jsontestdict[ 'data' ]
                            if debug:
                                print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label ))
                                print('    \'%s\''%( jsontestdict ))
                                print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label ))
                                print('    \'%s\''%( mongo_testquery ))
                                print('%s DEBUG: Filters in query (site test):'%( self._label ))
                                print('    \'%s\''%( testfilters ))
                                print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label ))
                                print('    \'%s\''%( testdata ))
                            foundSE = False
                            for testrow in testdata:
                                siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ]
                                if siteName == domainSE:
                                    foundSE = True
                                    break
                            if not foundSE:
                                if debug:
                                    print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ))
                                break
                            validVersion = version
                            if debug:
                                print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ))
                        if numberOfFiles == 0:
                            break
                        # protect from double entries ( 'unique' flag in query does not work here)
                        if not filePath in filePathsTmp:
                            filePathsTmp.append( filePath )
                            if debug:
                                print('%s DEBUG: File \'%s\' found'%( self._label, filePath ))
                            fileCount += 1
                            # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles")
                            if fileCount > skipFiles:
                                filePaths.append( filePath )
                        elif debug:
                            print('%s DEBUG: File \'%s\' found again'%( self._label, filePath ))
                if validVersion > 0:
                    if numberOfFiles == 0 and debug:
                        print('%s DEBUG: No files requested'%( self._label ))
                    break
        else:
            if debug:
                print('%s DEBUG: Using DBS query'%( self._label ))
            print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label ))
            #for version in range( maxVersions, 0, -1 ):
                #filePaths = []
                #fileCount = 0
                #dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
                #dbsQuery = 'find file where dataset = %s'%( dataset )
                #if debug:
                    #print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
                    #print '    \'%s\''%( dbsQuery )
                #foundSE = False
                #for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ).readlines():
                    #if line.find( '.root' ) != -1:
                        #if validVersion != version:
                            #if not foundSE:
                                #dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
                                #if debug:
                                    #print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE )
                                    #print '    \'%s\''%( dbsSiteQuery )
                                #for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ).readlines():
                                    #if lineSite.find( dataset ) != -1:
                                        #foundSE = True
                                        #break
                            #if not foundSE:
                                #if debug:
                                    #print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
                                #break
                            #validVersion = version
                            #if debug:
                                #print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
                        #if numberOfFiles == 0:
                            #break
                        #filePath = line.replace( '\n', '' )
                        #if debug:
                            #print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
                        #fileCount += 1
                        #if fileCount > skipFiles:
                            #filePaths.append( filePath )
                        #if not numberOfFiles < 0:
                            #if numberOfFiles <= len( filePaths ):
                                #break
                #if validVersion > 0:
                    #if numberOfFiles == 0 and debug:
                        #print '%s DEBUG: No files requested'%( self._label )
                    #break

        # Check output and return
        if validVersion == 0:
            print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ))
            if debug:
                self.messageEmptyList()
        elif len( filePaths ) == 0:
            print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ))
            if debug:
                self.messageEmptyList()
        elif len( filePaths ) < numberOfFiles:
            print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ))

        if debug:
            print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ))
        return filePaths
예제 #19
0
def common_search(dd_tier):
    dd_tier_re = re.compile(dd_tier.replace('*', '.*'))

    if os.environ['DD_SOURCE'] == "das":

        query = "dataset instance=cms_dbs_prod_global"
        if os.environ['DD_RELEASE'] != "":
            query = query + " release=" + os.environ['DD_RELEASE']
        if os.environ['DD_SAMPLE'] != "":
            query = query + " primary_dataset=" + os.environ['DD_SAMPLE']
        if dd_tier != "":
            query = query + " tier=" + dd_tier
        if os.environ['DD_COND'] != "":
            query = query + " dataset=*" + os.environ['DD_COND'] + "*"
        if os.environ['DD_RUN'] != "":
            query = query + " run=" + os.environ['DD_RUN']
        # query = query + " | unique" # too long ??

        # data = os.popen('das_client.py --limit=0 --query "'+query+'"')
        # datalines = data.readlines()
        # data.close()
        # datasets = []
        # for line in datalines:
        #  line = line.rstrip()
        #  if line != "" and line[0] =="/":
        #    datasets.append(line)
        # dataset = datasets[0]

        data = das_client.json.loads(
            das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0))

        if data['nresults'] == 0:
            print('[electronDataDiscovery.py] No DAS dataset for query:',
                  query)
            return []
        while data['nresults'] > 1:
            if data['data'][0]['dataset'][0]['name'] == data['data'][1][
                    'dataset'][0]['name']:
                data['data'].pop(0)
                data['nresults'] -= 1
            else:
                print(
                    '[electronDataDiscovery.py] Several DAS datasets for query:',
                    query)
                for i in range(data['nresults']):
                    print('[electronDataDiscovery.py] dataset[' + str(i) +
                          ']: ' + data['data'][i]['dataset'][0]['name'])
                return []

        dataset = data['data'][0]['dataset'][0]['name']

        query = "file instance=cms_dbs_prod_global dataset=" + dataset

        # data = os.popen('das_client.py --limit=0 --query "'+query+'"')
        # datalines = data.readlines()
        # data.close()
        # result = []
        # for line in datalines:
        #  line = line.rstrip()
        #  if line != "" and line[0] =="/":
        #    result.append(line)

        data = das_client.json.loads(
            das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, 0))

        if data['nresults'] == 0:
            print('[electronDataDiscovery.py] No DAS file in dataset:',
                  dataset)
            return []
        else:
            print('there is %d results' % nresults)

        result = []
        for i in range(0, data['nresults']):
            result.append(str(data['data'][i]['file'][0]['name']))

    elif os.environ['DD_SOURCE'] == "dbs":

        input = "find file"
        separator = " where "
        if os.environ['DD_RELEASE'] != "":
            input = input + separator + "release = " + os.environ['DD_RELEASE']
            separator = " and "
        if os.environ['DD_SAMPLE'] != "":
            input = input + separator + "primds = " + os.environ['DD_SAMPLE']
            separator = " and "
        if os.environ['DD_RUN'] != "":
            input = input + separator + "run = " + os.environ['DD_RUN']
            separator = " and "
        input = input + separator + "dataset like *" + os.environ[
            'DD_COND'] + "*" + dd_tier + "*"

        data = os.popen(
            'dbs search --url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" --query "'
            + input + '"')
        datalines = data.readlines()
        data.close()
        result = []
        for line in datalines:
            line = line.rstrip()
            if line != "" and line[0] == "/":
                result.append(line)

    elif os.environ['DD_SOURCE'] == "http":

        input = "find file"
        separator = " where "
        if os.environ['DD_RELEASE'] != "":
            input = input + separator + "release = " + os.environ['DD_RELEASE']
            separator = " and "
        if os.environ['DD_SAMPLE'] != "":
            input = input + separator + "primds = " + os.environ['DD_SAMPLE']
            separator = " and "
        if os.environ['DD_RUN'] != "":
            input = input + separator + "run = " + os.environ['DD_RUN']
            separator = " and "
        input = input + separator + "dataset like *" + os.environ[
            'DD_COND'] + "*" + dd_tier + "*"

        url = "https://cmsweb.cern.ch:443/dbs_discovery/aSearch"
        final_input = urllib.quote(input)

        agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
        ctypes = "text/plain"
        headers = {'User-Agent': agent, 'Accept': ctypes}
        params = {
            'dbsInst': 'cms_dbs_prod_global',
            'html': 0,
            'caseSensitive': 'on',
            '_idx': 0,
            'pagerStep': -1,
            'userInput': final_input,
            'xml': 0,
            'details': 0,
            'cff': 0,
            'method': 'dbsapi'
        }
        data = urllib.urlencode(params, doseq=True)
        req = urllib2.Request(url, data, headers)
        data = ""

        try:
            response = urllib2.urlopen(req)
            data = response.read()
        except urllib2.HTTPError as e:
            if e.code == 201:
                print(e.headers)
                print(e.msg)
                pass
            else:
                raise e

        datalines = data.readlines()
        data.close()
        result = []
        for line in datalines:
            line = line.rstrip()
            if line != "" and line[0] == "/":
                result.append(line)

    elif os.environ['DD_SOURCE'] == "lsf":

        dbs_path = '/' + os.environ['DD_SAMPLE'] + '/' + os.environ['DD_RELEASE'] + '-' + os.environ['DD_COND'] + '/' + \
                   os.environ['DD_TIER'] + '"'
        if __name__ == "__main__":
            print('dbs path:', dbs_path)
        data = os.popen('dbs lsf --path="' + dbs_path + '"')
        datalines = data.readlines()
        data.close()
        result = []
        for line in datalines:
            line = line.rstrip()
            if line != "" and line[0] == "/":
                result.append(line)

    elif os.environ['DD_SOURCE'].startswith(
            '/castor/cern.ch/cms/'):  # assumed to be a castor dir

        castor_dir = os.environ['DD_SOURCE'].replace('/castor/cern.ch/cms/',
                                                     '/', 1)
        result = []
        data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir)
        subdirs = data.readlines()
        data.close()
        datalines = []
        for line in subdirs:
            line = line.rstrip()
            subdir = line.split()[8]
            data = os.popen('rfdir /castor/cern.ch/cms' + castor_dir + '/' +
                            subdir)
            datalines = data.readlines()
            for line in datalines:
                line = line.rstrip()
                file = line.split()[8]
                if file != "":
                    result.append(castor_dir + '/' + subdir + '/' + file)
            data.close()

    elif os.environ['DD_SOURCE'].startswith(
            '/eos/cms/'):  # assumed to be an eos dir

        data = os.popen('eos find -f ' + os.environ['DD_SOURCE'])
        lines = data.readlines()
        data.close()
        result = []
        for line in lines:
            line = line.strip().replace('/eos/cms/', '/', 1)
            if line == "": continue
            if dd_sample_re.search(line) == None: continue
            if dd_cond_re.search(line) == None: continue
            if dd_tier_re.search(line) == None: continue
            if dd_run_re.search(line) == None: continue
            result.append(line)

    else:  # os.environ['DD_SOURCE'] is assumed to be a file name

        result = []
        for line in open(os.environ['DD_SOURCE']).readlines():
            line = os.path.expandvars(line.strip())
            if line == "": continue
            if dd_sample_re.search(line) == None: continue
            if dd_cond_re.search(line) == None: continue
            if dd_tier_re.search(line) == None: continue
            if dd_run_re.search(line) == None: continue
            result.append(line)

        if len(result) == 0:
            diag = '[electronDataDiscovery.py] No more files after filtering with :'
            if os.environ['DD_SAMPLE'] != '':
                diag += ' ' + os.environ['DD_SAMPLE']
            if os.environ['DD_COND'] != '': diag += ' ' + os.environ['DD_COND']
            if dd_tier != '': diag += ' ' + dd_tier
            if os.environ['DD_RUN'] != '': diag += ' ' + os.environ['DD_RUN']
            print(diag)

    return result