Python DbsApi.DbsApi Exemples, dbs.apis.dbsClient.DbsApi.DbsApi Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : RequestQuery.py Projet : holzman/WMCore

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")

Exemple #2

0

Afficher le fichier

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")

Exemple #3

0

Afficher le fichier

Fichier : getSiblings.py Projet : czkaiweb/DisappTrks

def getSiblings(fileName, dataset):
    try:
        from dbs.apis.dbsClient import DbsApi
        from CRABClient.ClientUtilities import DBSURLS
    except ImportError:
        print "getSiblings() relies on CRAB. Please set up the environment for CRAB before using."
        sys.exit(1)

    dbsurl_global = DBSURLS["reader"].get("global", "global")
    dbsurl_phys03 = DBSURLS["reader"].get("phys03", "phys03")
    dbs3api_phys03 = DbsApi(url=dbsurl_phys03)
    dbs3api_global = DbsApi(url=dbsurl_global)

    # if there is an xrootd prefix, strip it
    if "/store/" in fileName:
        i = fileName.find("/store/")
        fileName = fileName[i:]

    # first get the parents
    parents = dbs3api_phys03.listFileParents(logical_file_name=fileName)

    # for each of the parents, get the grandparents
    grandparents = []
    for parent in parents:
        for parent_file_name in parent["parent_logical_file_name"]:
            grandparents.extend(
                dbs3api_global.listFileParents(
                    logical_file_name=parent_file_name))

    # then for each of the grandparents, get their children
    children = []
    for grandparent in grandparents:
        for grandparent_file_name in grandparent["parent_logical_file_name"]:
            children.extend(
                dbs3api_global.listFileChildren(
                    logical_file_name=grandparent_file_name))

    # put the children in a set
    miniaod = set([])
    for child in children:
        for child_file_name in child["child_logical_file_name"]:
            miniaod.add(child_file_name)

    # put the files of the target dataset in another set
    dataset = dbs3api_global.listFiles(dataset=dataset)
    miniaodSuperset = set([])
    for f in dataset:
        miniaodSuperset.add(f["logical_file_name"])

    # return the intersection of the two sets
    return list(miniaodSuperset.intersection(miniaod))

Exemple #4

0

Afficher le fichier

def getFilenames(txtFile):
    # dasgoclient python API
    dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
    global_director = "root://cmsxrootd.fnal.gov/"

    # Read out input files containing data set names
    with open(txtFile) as f:
        datasets = [
            dataset for dataset in f.read().splitlines() if dataset != ""
        ]

    # Fill file names in using dasgoclient API
    filelist = {}

    for setname in datasets:
        if "mc" in setname:
            filelist[setname.split("/")[1]] = [
                global_director + filename['logical_file_name']
                for filename in dbs.listFiles(dataset=setname, detail=1)
            ]

        elif "user" in setname:
            filelist[setname.split("/")[5]] = [global_director + setname]

        else:
            filelist[setname.split("/")[1] + "-" + setname.split("/")[2]] = [
                global_director + filename['logical_file_name']
                for filename in dbs.listFiles(dataset=setname, detail=1)
            ]

    # print filelist

    return filelist

Exemple #5

0

Afficher le fichier

def dbs3_get_data(dataset, timestamps=1):

    #q = "/afs/cern.ch/user/s/spinoso/public/dbs3wrapper.sh /afs/cern.ch/user/c/cmst2/mc/scripts/datasetinfo.py --dataset %s --json" % dataset
    #output=os.popen(q).read()
    #s = json.loads(output)
    dbsapi = DbsApi(url=dbs3_url)
    # retrieve dataset summary
    try:
        reply = dbsapi.listDatasets(dataset=dataset,
                                    dataset_access_type='*',
                                    detail=True)
        #print reply
        if len(reply):
            status = reply[0]['dataset_access_type']
            reply = dbsapi.listBlockSummaries(dataset=dataset, detail=True)
            cnt = 0
            for block in reply:
                cnt += int(block['num_event'])
            return [cnt, status, int(cnt / 100.)]
        else:
            print dataset, "not exsiting"
            return [0, '', 0]

    except:
        print "crash dbs3"
        return [0, '', 0]

Exemple #6

0

Afficher le fichier

Fichier : get_v4.py Projet : yhaddad/NUNanoAOD

def das_files(dataset):
    dataset_split = dataset.split('/')
    dataset_split[2] = 'RunIIFall17NanoAODv4*'
    datasetv4 = '/'.join(dataset_split)

    dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
    return dbs.listDatasets(dataset=datasetv4)

Exemple #7

0

Afficher le fichier

Fichier : DBSUploadPoller.py Projet : tsarangi/WMCore

def uploadWorker(input, results, dbsUrl):
    """
    _uploadWorker_

    Put JSONized blocks in the input
    Get confirmation in the output
    """

    # Init DBS Stuff
    logging.debug("Creating dbsAPI with address %s" % dbsUrl)
    dbsApi = DbsApi(url = dbsUrl)


    while True:

        try:
            work = input.get()
        except (EOFError, IOError):
            crashMessage = "Hit EOF/IO in getting new work\n"
            crashMessage += "Assuming this is a graceful break attempt.\n"
            logging.error(crashMessage)
            break

        if work == 'STOP':
            # Then halt the process
            break

        name  = work.get('name', None)
        block = work.get('block', None)

        # Do stuff with DBS
        try:
            logging.debug("About to call insert block with block: %s" % block)
            dbsApi.insertBulkBlock(blockDump = block)
            results.put({'name': name, 'success': "uploaded"})
        except Exception as ex:
            exString = str(ex)
            if 'Block %s already exists' % name in exString:
                # Then this is probably a duplicate
                # Ignore this for now
                logging.error("Had duplicate entry for block %s. Ignoring for now." % name)
                logging.debug("Exception: %s" % exString)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                results.put({'name': name, 'success': "uploaded"})
            elif 'Proxy Error' in exString:
                # This is probably a successfully inserton that went bad.
                # Put it on the check list
                msg = "Got a proxy error for block (%s)." % name
                logging.error(msg)
                logging.error(str(traceback.format_exc()))
                results.put({'name': name, 'success': "check"})
            else:
                msg =  "Error trying to process block %s through DBS.\n" % name
                msg += exString
                logging.error(msg)
                logging.error(str(traceback.format_exc()))
                logging.debug("block: %s \n" % block)
                results.put({'name': name, 'success': "error", 'error': msg})

    return

Exemple #8

0

Afficher le fichier

def duplicateLumi(dataset, verbose=False, skipInvalid=False):
    """
    checks if output dataset has duplicate lumis
    returns true if at least one duplicate lumi was found
    Verbose: if true prints details
    skipInvalid: if true skips invalid files, by default is False because is faster
   """
    # initialize API to DBS3
    dbsapi = DbsApi(url=dbs3_url)
    duplicated = False
    lumisChecked = {}
    # retrieve files
    reply = dbsapi.listFiles(dataset=dataset, detail=skipInvalid)
    for f in reply:
        logical_file_name = f['logical_file_name']
        #skip invalid files
        if skipInvalid and f['is_file_valid'] != 1:
            continue
        reply2 = dbsapi.listFileLumis(logical_file_name=logical_file_name)
        #retrieve lumis for each file
        lumis = reply2[0]['lumi_section_num']
        #check that each lumi is only in one file
        for lumi in lumis:
            if lumi in lumisChecked:
                #if verbose print results, if not end quickly
                if verbose:
                    print 'Lumi', lumi, 'is in these files'
                    print logical_file_name
                    print lumisChecked[lumi]
                    duplicated = True
                else:
                    return True
            else:
                lumisChecked[lumi] = logical_file_name
    return duplicated

Exemple #9

0

Afficher le fichier

def getDatasetStatus(dataset):
    """
    Gets the dataset status (access type): VALID, INVALID, PRODUCTION, DEPRECATED
    """
    dbsapi = DbsApi(url=dbs3_url)
    reply = dbsapi.listDatasets(dataset=dataset,dataset_access_type='*',detail=True)
    return reply[0]['dataset_access_type']

Exemple #10

0

Afficher le fichier

def getDatasetSize(dataset):
    # initialize API to DBS3
    dbsapi = DbsApi(url=dbs3_url)
    # retrieve file aggregation only by the runs
    #transform from strin to list
    reply = dbsapi.listBlockSummaries(dataset=dataset)
    return reply[0]['file_size']

Exemple #11

0

Afficher le fichier

def get_dbs_api(instance='global'):
    """Return an API client for a CMS Dataset Bookkeeping (DBS) server instance.

    For a given DBS instance, a new DbsApi object is returned on the first call.
    Subsequent calls for the same DBS instance return the same DbsApi object to
    avoid reinitialization.

    Parameters
    ----------
    instance : string
        One of the following DBS server instances:
            * global (default)
            * phys01
            * phys02
            * phys03
            * caf

    Returns
    -------
    DbsApi
        A DbsApi object configured for the requested DBS server instance.
    """
    DBS_INSTANCES = {'global', 'phys01', 'phys02', 'phys03', 'caf'}
    if instance not in DBS_INSTANCES:
        raise ValueError('Unrecognized DBS instance: {0}'.format(instance))
    dbs_api = globals().get(instance.upper(), None)
    if dbs_api is None:
        url = 'https://cmsweb.cern.ch/dbs/prod/{0}/DBSReader'.format(instance)
        dbs_api = DbsApi(url)
        globals()[instance.upper()] = dbs_api
    return dbs_api

Exemple #12

0

Afficher le fichier

Fichier : WorkflowTools.py Projet : menglu21/WMCore

def getDatasetStatus(dataset):
    "Return dataset status"
    dbsapi = DbsApi(url=DBS3, verifypeer=False)
    reply = dbsapi.listDatasets(dataset=dataset,
                                dataset_access_type='*',
                                detail=True)
    return reply[0]['dataset_access_type']

Exemple #13

0

Afficher le fichier

def main():
#  args=sys.argv[1:]
#  data=args[0]

  sample_group = 'signal' # signal, background, data, all
  sample_list = get_sample_list(sample_group)
  sample_list.sort()

  url="https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
  api=DbsApi(url=url)

  for samp in sample_list:
    outputDataSets = ''
    #print('Checking {1}'.format(samp.DAS))
    outputDataSets = api.listDatasets(dataset=samp.DAS, detail = True, dataset_access_type='VALID')
 
    if outputDataSets:
      for ds in outputDataSets:
       #print('{0}'.format(ds['dataset']))
       #print('{0}'.format(ds['primary_ds_name']))
       #print('{0}'.format(ds['xtcrosssection']))
       nevents = api.listBlockSummaries(dataset=ds['dataset'])
       #print(nevents[0]['num_event'])    
       # this to create a table for the paper with dataset name and number of events 
       print('verb@ {0} @ & {1:.2e} & XX \\\\ '.format(ds['primary_ds_name'],nevents[0]['num_event'])) 
  sys.exit(0);

Exemple #14

0

Afficher le fichier

    def getBlockSitesFromLocalDBS3(self,dbs_url):

        ## find the location for each block in the list
        from dbs.apis.dbsClient import DbsApi
        api = DbsApi(dbs_url)

        from NodeNameUtils import getMapOfSEHostName2PhedexNodeNameFromPhEDEx

        se2pnn = getMapOfSEHostName2PhedexNodeNameFromPhEDEx()

        blockSites = {}
        for block in self.Listfileblocks:
            blockInfo=api.listBlocks(block_name=block,detail=True)
            location=blockInfo[0]['origin_site_name']
            if location == 'UNKNOWN':
                blockSites[block] = []
            else:
                #if locationIsValidPNN:
                if location.startswith('T2_') or location.startswith('T3_'):
                    blockSites[block] = [location]
                else:
                    if location in se2pnn.keys():
                        blockSites[block] = [se2pnn[location]]
                    else:
                        msg = "ERROR: unknown location for block: %s. Skip this block" % location
                        common.logger.info(msg)
                        blockSites[block] = []

        return blockSites

Exemple #15

0

Afficher le fichier

def crabConfig(dataSet, setName, outDir, systematics, channels, era):
    isSignal = "HPlus" in setName
    isData = "Single" in setName or "JetHT" in setName or "EGamma" in setName

    outFiles = []

    for systematic in systematics:
        if systematic == "":
            outFiles.append("{}.root".format(setName))
            continue

        if isData:
            break

        for shift in ["Up", "Down"]:
            outFiles.append("{}_{}{}.root".format(setName, systematic, shift))

    #Caculate number of files per job
    url = "https://cmsweb.cern.ch/dbs/prod/{}/DBSReader".format(
        "global")  # if not isSignal else "phys03")
    api = DbsApi(url=url)
    files = api.listFiles(dataset=dataSet, detail=1)

    eventsPerFile = sum(f["event_count"] for f in files) / len(files)
    filesPerJob = int(math.ceil(300000. / eventsPerFile))

    ##Crab config
    crabConf = config()

    crabConf.General.requestName = "Skim_{}".format(era)
    crabConf.General.workArea = outDir
    crabConf.General.transferOutputs = True
    crabConf.General.transferLogs = False

    crabConf.JobType.pluginName = "Analysis"
    crabConf.JobType.psetName = "{}/src/ChargedSkimming/Skimming/python/miniskimmer.py".format(
        os.environ["CMSSW_BASE"])
    crabConf.JobType.pyCfgParams = [
        "outname={}.root".format(setName),
        "channel={}".format(",".join(channels)), "era={}".format(era)
    ]
    crabConf.JobType.outputFiles = outFiles
    crabConf.JobType.maxJobRuntimeMin = 1440
    crabConf.JobType.maxMemoryMB = 2500
    crabConf.JobType.allowUndistributedCMSSW = True

    crabConf.Data.inputDataset = dataSet
    crabConf.Data.inputDBS = "global"  # if not isSignal else "phys03"
    crabConf.Data.splitting = "FileBased"
    crabConf.Data.unitsPerJob = filesPerJob
    crabConf.Data.outLFNDirBase = "/store/user/dbrunner/skim/{}/{}".format(
        "_".join([
            str(getattr(time.localtime(), "tm_" + t))
            for t in ["mday", "mon", "year"]
        ]), era)

    crabConf.Site.storageSite = "T2_DE_DESY"
    crabConf.User.voGroup = "dcms"

    return crabConf

Exemple #16

0

Afficher le fichier

Fichier : UserUtilities.py Projet : emaszs/CRABClient

def getLumiListInValidFiles(dataset, dbsurl = 'phys03'):
    """
    Get the runs/lumis in the valid files of a given dataset.

    dataset: the dataset name as published in DBS
    dbsurl: the DBS URL or DBS prod instance

    Returns a LumiList object.
    """
    dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl)
    dbs3api = DbsApi(url=dbsurl)
    try:
        files = dbs3api.listFileArray(dataset=dataset, validFileOnly=0, detail=True)
    except Exception as ex:
        msg  = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % (dataset, dbsurl, ex)
        msg += "\n%s" % (traceback.format_exc())
        raise ClientException(msg)
    if not files:
        msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl)
        raise ClientException(msg)
    validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']]
    blocks = set([f['block_name'] for f in files])
    runLumiPairs = []
    for blockName in blocks:
        fileLumis = dbs3api.listFileLumis(block_name=blockName)
        for f in fileLumis:
            if f['logical_file_name'] in validFiles:
                run = f['run_num']
                lumis = f['lumi_section_num']
                for lumi in lumis:
                    runLumiPairs.append((run,lumi))
    lumiList = LumiList(lumis=runLumiPairs)

    return lumiList

Exemple #17

0

Afficher le fichier

def get_filenames(bkgTXT, dataTXT, sigTXT):
    ##dasgoclient python API
    dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
    global_director = "root://cmsxrootd.fnal.gov/"

    ##Read out input files containing data set names
    if bkgTXT:
        with open(bkgTXT) as f:
            background = [
                background for background in f.read().splitlines()
                if background != ""
            ]

    else:
        background = []

    if dataTXT:
        with open(dataTXT) as f:
            data = [data for data in f.read().splitlines() if data != ""]

    else:
        data = []

    if sigTXT:
        with open(sigTXT) as f:
            signal = [
                signal for signal in f.read().splitlines() if signal != ""
            ]

    else:
        signal = []

    ##Fill file names in using dasgoclient API
    filelist = {}

    for setname in background + data:
        if "mc" in setname:
            key = setname.split("/")[1]

        else:
            key = setname.split("/")[1] + "-" + setname.split("/")[2]

        filelist[key] = [
            global_director + filename['logical_file_name']
            for filename in dbs.listFiles(dataset=setname, detail=1)
        ]

    ##Read out signal files with gfal-ls command
    for SEpath in signal:
        key = SEpath.split("/")[-2]
        signalFiles = subprocess.check_output(["gfal-ls",
                                               SEpath]).split("\n")[:-1]

        filelist[key] = [
            global_director + SEpath[74:] + "/" + signalFile
            for signalFile in signalFiles
        ]

    return filelist

Exemple #18

0

Afficher le fichier

def uploadWorker(workInput, results, dbsUrl):
    """
    _uploadWorker_

    Put JSONized blocks in the workInput
    Get confirmation in the output
    """

    # Init DBS Stuff
    logging.debug("Creating dbsAPI with address %s", dbsUrl)
    dbsApi = DbsApi(url=dbsUrl)

    while True:

        try:
            work = workInput.get()
        except (EOFError, IOError):
            crashMessage = "Hit EOF/IO in getting new work\n"
            crashMessage += "Assuming this is a graceful break attempt.\n"
            logging.error(crashMessage)
            break

        if work == 'STOP':
            # Then halt the process
            break

        name = work.get('name', None)  # this is the block name
        block = work.get('block', None)  # this is the block data structure

        # Do stuff with DBS
        try:
            logging.debug("About to call insert block with block: %s", block)
            dbsApi.insertBulkBlock(blockDump=block)
            results.put({'name': name, 'success': "uploaded"})
        except Exception as ex:
            exString = str(ex)
            if 'Block %s already exists' % name in exString:
                # Then this is probably a duplicate
                # Ignore this for now
                logging.warning("Block %s already exists. Marking it as uploaded.", name)
                logging.debug("Exception: %s", exString)
                results.put({'name': name, 'success': "uploaded"})
            elif 'Proxy Error' in exString:
                # This is probably a successfully insertion that went bad.
                # Put it on the check list
                msg = "Got a proxy error for block %s." % name
                logging.warning(msg)
                results.put({'name': name, 'success': "check"})
            elif 'Missing data when inserting to dataset_parents' in exString:
                msg = "Parent dataset is not inserted yet for block %s." % name
                logging.warning(msg)
                results.put({'name': name, 'success': "error", 'error': msg})
            else:
                msg = "Error trying to process block %s through DBS. Error: %s" % (name, exString)
                logging.exception(msg)
                logging.debug("block info: %s \n", block)
                results.put({'name': name, 'success': "error", 'error': msg})

    return

Exemple #19

0

Afficher le fichier

Fichier : DBS3Reader.py Projet : todor-ivanov/WMCore

def getDataTiers(dbsUrl):
    """
    Function to retrieve all the datatiers from DBS.
    NOTE: to be used with some caching (MemoryCacheStruct)
    :param dbsUrl: the DBS URL string
    :return: a list of strings/datatiers
    """
    dbs = DbsApi(dbsUrl)
    return [tier['data_tier_name'] for tier in dbs.listDataTiers()]

Exemple #20

0

Afficher le fichier

 def __init__(self, args):
     # just make sure args value complies with dbs args
     try:
         from dbs.apis.dbsClient import DbsApi
         DbsApi(args)
     except ImportError:
         # No dbsApi available, carry on
         pass
     self.args = args

Exemple #21

0

Afficher le fichier

Fichier : DBS3Reader.py Projet : samircury/WMCore

    def __init__(self, url, **contact):

        # instantiate dbs api object
        try:
            self.dbs = DbsApi(url, **contact)
        except DbsException, ex:
            msg = "Error in DBSReader with DbsApi\n"
            msg += "%s\n" % formatEx(ex)
            raise DBSReaderError(msg)

Exemple #22

0

Afficher le fichier

def getSize(dataset):
    # initialize API to DBS3
    dbsapi = DbsApi(url=dbs3_url)
    # retrieve dataset summary
    reply = dbsapi.listBlocks(dataset=dataset, detail=True)
    sum = 0
    for block in reply:
        sum = sum + block['block_size']
    return sum

Exemple #23

0

Afficher le fichier

def getFileCount(dataset):
        # initialize API to DBS3
        dbsapi = DbsApi(url=dbs3_url)
        # retrieve dataset summary
        reply = dbsapi.listBlockSummaries(dataset=dataset,detail=True)
        cnt=0
        for block in reply:
           cnt = cnt + int(block['num_file'])
        return cnt

Exemple #24

0

Afficher le fichier

 def __init__(self, args, **contact):
     # just make sure args value complies with dbs args
     try:
         from dbs.apis.dbsClient import DbsApi
         DbsApi(args, **contact)
     except ImportError:
         # No dbsApi available, carry on
         pass
     self.args = args
     self.dbg = DataBlockGenerator3()

Exemple #25

0

Afficher le fichier

Fichier : DBS3Reader.py Projet : BioGeek/WMCore

    def _setDatatiersCache(ts, dbsUrl):
        """
        Set a timestamp and update the list of datatiers cached in
        the class property
        """
        dbs = DbsApi(dbsUrl)
        DBS3Reader._datatiers['ts'] = ts
        DBS3Reader._datatiers['tiers'] = [tier['data_tier_name'] for tier in dbs.listDataTiers()]

        return

Exemple #26

0

Afficher le fichier

def getEventCountBlock(block):
    """
    Returns the number of events in a dataset using DBS3

    """
    # initialize API to DBS3
    dbsapi = DbsApi(url=dbs3_url)
    # retrieve dataset summary
    reply = dbsapi.listBlockSummaries(block_name=block)
    return reply[0]['num_event']

Exemple #27

0

Afficher le fichier

def getNumberofFilesPerRun(das_url, dataset, run):
    """
    Count number of files
    """
    # initialize API to DBS3
    dbsapi = DbsApi(url=dbs3_url)

    # retrieve file list
    reply = dbsapi.listFiles(dataset=dataset)
    return len(reply)

Exemple #28

0

Afficher le fichier

def getDatasetEventsPerLumi(dataset):
    dbsapi = DbsApi(url='https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
    all_files = dbsapi.listFileSummaries(dataset=dataset, validFileOnly=1)
    try:
        average = sum(
            [f['num_event'] / float(f['num_lumi'])
             for f in all_files]) / float(len(all_files))
    except:
        average = 100
    return average

Exemple #29

0

Afficher le fichier

Fichier : DBS3Reader.py Projet : todor-ivanov/WMCore

    def __init__(self, url, logger=None, **contact):

        # instantiate dbs api object
        try:
            self.dbsURL = url.replace("cmsweb.cern.ch", "cmsweb-prod.cern.ch")
            self.dbs = DbsApi(self.dbsURL, **contact)
            self.logger = logger or logging.getLogger(self.__class__.__name__)
        except dbsClientException as ex:
            msg = "Error in DBSReader with DbsApi\n"
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

Exemple #30

0

Afficher le fichier

def duplicateRunLumi(dataset, verbose=False, skipInvalid=False):
    """
    checks if output dataset has duplicate lumis
    for every run.
    returns true if at least one duplicate lumi was found
    That is if there is the same lumi in the same run and
    two different files
    This can be used on datasets that have separate
    runs.
    Verbose: if true prints details
    skipInvalid: if true skips invalid files, by default is False because is faster
    """
    dbsapi = DbsApi(url=dbs3_url)
    duplicated = False
    #check each run
    runs = getRunsDataset(dataset)
    #if only one run in the list
    if len(runs) == 1:
        if verbose:
            print "only one run:", runs
        return duplicateLumi(dataset, verbose, skipInvalid)
    #else manually
    for run in runs:
        #create a set
        lumisChecked = {}
        # retrieve files for that run
        reply = dbsapi.listFiles(dataset=dataset, detail=skipInvalid)
        for f in reply:
            #skip invalid files
            if skipInvalid and f['is_file_valid'] != 1:
                continue
            logical_file_name = f['logical_file_name']
            reply2 = dbsapi.listFileLumis(logical_file_name=logical_file_name,
                                          run_num=run)
            #retrieve lumis for each file
            if reply2:
                lumis = reply2[0]['lumi_section_num']
            else:
                continue
            #check that each lumi is only in one file
            for lumi in lumis:
                if lumi in lumisChecked:
                    #if verbose print results, if not end quickly
                    if verbose:
                        print 'Lumi', lumi, 'in run', run, 'is in these files'
                        print logical_file_name
                        print lumisChecked[lumi]
                        duplicated = True
                    else:
                        return True
                else:
                    lumisChecked[lumi] = logical_file_name

    return duplicated