Ejemplo n.º 1
0
def getBlockReplicasAndSizeRucio(datasets, rucioUrl, rucioToken, scope="cms"):
    """
    Given a list of datasets, find all their blocks with replicas
    available.
    :param datasets: list of dataset names
    :param rucioUrl: a string with the Rucio URL
    :param rucioToken: a string with the user rucio token
    :param scope: a string with the Rucio scope of our data
    :return: a dictionary in the form of:
    {"dataset":
        {"block":
            {"blockSize": 111, "locations": ["x", "y"]}
        }
    }
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    dsetBlockSize = {}
    if not datasets:
        return dsetBlockSize

    headers = {"X-Rucio-Auth-Token": rucioToken}
    # first, figure out their block names
    blocksByDset = getContainerBlocksRucio(datasets, rucioUrl, rucioToken, scope=scope)
    urls = []
    for _dset, blocks in blocksByDset.items():
        for block in blocks:
            urls.append('{}/replicas/{}/{}/datasets'.format(rucioUrl, scope, quote(block)))

    # next, query the replicas API for the block location
    # this is going to be bloody expensive in terms of HTTP requests
    logging.info("Executing %d requests against Rucio replicas API for blocks", len(urls))
    data = multi_getdata(urls, ckey(), cert(), headers=headers)
    for row in data:
        block = row['url'].split("/{}/".format(scope))[1]
        block = unquote(re.sub("/datasets$", "", block, 1))
        container = block.split("#")[0]
        dsetBlockSize.setdefault(container, dict())
        if row['data'] is None:
            msg = "Failure in getBlockReplicasAndSizeRucio for container {} and block {}.".format(container, block)
            msg += " Response: {}".format(row)
            logging.error(msg)

            dsetBlockSize[container] = None
            continue
        if dsetBlockSize[container] is None:
            # then one of the block requests failed, skip the whole dataset
            continue

        thisBlockRSEs = []
        blockBytes = 0
        for item in parseNewLineJson(row['data']):
            blockBytes = item['bytes']
            if item['state'] == "AVAILABLE":
                thisBlockRSEs.append(item["rse"])
        # now we have the final block location
        if not blockBytes and not thisBlockRSEs:
            logging.warning("Block: %s has no replicas and no size", block)
        else:
            dsetBlockSize[container][block] = {"locations": thisBlockRSEs, "blockSize": blockBytes}
    return dsetBlockSize
Ejemplo n.º 2
0
def dbsInfo(datasets):
    "Provides DBS info about dataset blocks"
    urls = [
        '%s/blocks?detail=True&dataset=%s' % (dbsUrl(), d) for d in datasets
    ]
    data = multi_getdata(urls, ckey(), cert())
    datasetBlocks = {}
    datasetSizes = {}
    #     nblocks = 0
    for row in data:
        dataset = row['url'].split('=')[-1]
        rows = json.loads(row['data'])
        blocks = []
        size = 0
        for item in rows:
            blocks.append(item['block_name'])
            size += item['block_size']
        datasetBlocks[dataset] = blocks
        datasetSizes[dataset] = size


#         nblocks += len(blocks)
#     tot_size = 0
#     for dataset, blocks in datasetBlocks.iteritems():
#         tot_size += datasetSizes[dataset]
    return datasetBlocks, datasetSizes
Ejemplo n.º 3
0
def getContainerBlocksRucio(containers, rucioUrl, rucioToken, scope="cms"):
    """
    Provided a list of containers, find all their blocks.
    :param containers: list of container names
    :param rucioUrl: a string with the Rucio URL
    :param rucioToken: a string with the user rucio token
    :param scope: a string with the Rucio scope of our data
    :return: a dictionary key'ed by the datasets with a list of blocks.
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    blocksByDset = {}
    if not containers:
        return blocksByDset

    headers = {"X-Rucio-Auth-Token": rucioToken}
    urls = ['{}/dids/{}/{}/dids'.format(rucioUrl, scope, cont) for cont in containers]
    logging.info("Executing %d requests against Rucio DIDs API for blocks in containers", len(urls))
    data = multi_getdata(urls, ckey(), cert(), headers=headers)
    for row in data:
        container = row['url'].split("/{}/".format(scope))[1]
        container = re.sub("/dids$", "", container, 1)
        if not row['data']:
            logging.warning("Dataset: %s has no blocks in Rucio", container)
        blocksByDset.setdefault(container, [])
        for item in parseNewLineJson(row['data']):
            blocksByDset[container].append(item["name"])
    return blocksByDset
Ejemplo n.º 4
0
def getBlocksByDsetAndRun(datasetName, runList, dbsUrl):
    """
    Given a dataset name and a list of runs, find all the blocks
    :return: flat list of blocks
    """
    blocks = set()
    if isinstance(runList, set):
        runList = list(runList)

    urls = [
        '%s/blocks?run_num=%s&dataset=%s' %
        (dbsUrl, str(runList).replace(" ", ""), datasetName)
    ]
    data = multi_getdata(urls, ckey(), cert())

    for row in data:
        dataset = row['url'].rsplit('=')[-1]
        if row['data'] is None:
            msg = "Failure in getBlocksByDsetAndRun for %s. Error: %s %s" % (
                dataset, row.get('code'), row.get('error'))
            raise RuntimeError(msg)
        rows = json.loads(row['data'])
        for item in rows:
            blocks.add(item['block_name'])

    return list(blocks)
Ejemplo n.º 5
0
def getFileLumisInBlock(blocks, dbsUrl, validFileOnly=1):
    """
    Given a list of blocks, find their file run lumi information
    in DBS for up to 10 blocks concurrently
    :param blocks: list of block names
    :param dbsUrl: string with the DBS URL
    :param validFileOnly: integer flag for valid files only or not
    :return: a dict of blocks with list of file/run/lumi info
    """
    runLumisByBlock = {}
    urls = [
        '%s/filelumis?validFileOnly=%d&block_name=%s' %
        (dbsUrl, validFileOnly, quote(b)) for b in blocks
    ]
    # limit it to 10 concurrent calls not to overload DBS
    logging.info(
        "Executing %d requests against DBS 'filelumis' API, concurrency limited to 10",
        len(urls))
    data = multi_getdata(urls, ckey(), cert(), num_conn=10)

    for row in data:
        blockName = unquote(row['url'].rsplit('=')[-1])
        if row['data'] is None:
            msg = "Failure in getFileLumisInBlock for block %s. Error: %s %s" % (
                blockName, row.get('code'), row.get('error'))
            raise RuntimeError(msg)
        rows = json.loads(row['data'])
        runLumisByBlock.setdefault(blockName, [])
        for item in rows:
            runLumisByBlock[blockName].append(item)
    return runLumisByBlock
Ejemplo n.º 6
0
def getBlockReplicasAndSize(datasets, phedexUrl, group=None):
    """
    Given a list of datasets, find all their blocks with replicas
    available (thus blocks with at least 1 valid file), completed
    and subscribed.
    If PhEDEx group is provided, make sure it's subscribed under that
    same group.
    :param datasets: list of dataset names
    :param phedexUrl: a string with the PhEDEx URL
    :param group: optional PhEDEx group name
    :return: a dictionary in the form of:
    {"dataset":
        {"block":
            {"blockSize": 111, "locations": ["x", "y"]}
        }
    }
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    dsetBlockSize = {}
    if not datasets:
        return dsetBlockSize

    urls = [
        '%s/blockreplicas?dataset=%s' % (phedexUrl, dset) for dset in datasets
    ]
    data = multi_getdata(urls, ckey(), cert())

    for row in data:
        dataset = row['url'].split('=')[-1]
        if row['data'] is None:
            print(
                "Failure in getBlockReplicasAndSize for dataset %s. Error: %s %s"
                % (dataset, row.get('code'), row.get('error')))
            dsetBlockSize.setdefault(dataset, None)
            continue
        rows = json.loads(row['data'])
        dsetBlockSize.setdefault(dataset, {})
        try:
            for item in rows['phedex']['block']:
                block = {
                    item['name']: {
                        'blockSize': item['bytes'],
                        'locations': []
                    }
                }
                for repli in item['replica']:
                    if repli['complete'] == 'y' and repli['subscribed'] == 'y':
                        if not group:
                            block[item['name']]['locations'].append(
                                repli['node'])
                        elif repli['group'] == group:
                            block[item['name']]['locations'].append(
                                repli['node'])
                dsetBlockSize[dataset].update(block)
        except Exception as exc:
            print(
                "Failure in getBlockReplicasAndSize for dataset %s. Error: %s"
                % (dataset, str(exc)))
            dsetBlockSize[dataset] = None
    return dsetBlockSize
Ejemplo n.º 7
0
def findParent(datasets, dbsUrl):
    """
    Helper function to find the parent dataset.
    It returns a dictionary key'ed by the child dataset
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    parentByDset = {}
    if not datasets:
        return parentByDset

    urls = ['%s/datasetparents?dataset=%s' % (dbsUrl, d) for d in datasets]
    logging.info("Executing %d requests against DBS 'datasetparents' API",
                 len(urls))
    data = multi_getdata(urls, ckey(), cert())

    for row in data:
        dataset = row['url'].split('=')[-1]
        if row['data'] is None:
            print("Failure in findParent for dataset %s. Error: %s %s" %
                  (dataset, row.get('code'), row.get('error')))
            parentByDset.setdefault(dataset, None)
            continue
        rows = json.loads(row['data'])
        try:
            for item in rows:
                parentByDset[item['this_dataset']] = item['parent_dataset']
        except Exception as exc:
            print("Failure in findParent for dataset %s. Error: %s" %
                  (dataset, str(exc)))
            parentByDset[dataset] = None
    return parentByDset
Ejemplo n.º 8
0
def getBlocksByDsetAndRun(datasetName, runList, dbsUrl):
    """
    Given a dataset name and a list of runs, find all the blocks
    :return: flat list of blocks
    """
    blocks = set()
    if isinstance(runList, set):
        runList = list(runList)

    urls = []
    for runSlice in grouper(runList, 50):
        urls.append('%s/blocks?run_num=%s&dataset=%s' %
                    (dbsUrl, str(runSlice).replace(" ", ""), datasetName))
    logging.info(
        "Executing %d requests against DBS 'blocks' API, with run_num list",
        len(urls))
    data = multi_getdata(urls, ckey(), cert())

    for row in data:
        dataset = row['url'].rsplit('=')[-1]
        if row['data'] is None:
            msg = "Failure in getBlocksByDsetAndRun for %s. Error: %s %s" % (
                dataset, row.get('code'), row.get('error'))
            raise RuntimeError(msg)
        rows = json.loads(row['data'])
        for item in rows:
            blocks.add(item['block_name'])

    return list(blocks)
Ejemplo n.º 9
0
def dbsInfo(datasets, dbsUrl):
    "Provides DBS info about dataset blocks"
    datasetBlocks = {}
    datasetSizes = {}
    datasetTransfers = {}
    if not datasets:
        return datasetBlocks, datasetSizes, datasetTransfers

    urls = ['%s/blocks?detail=True&dataset=%s' % (dbsUrl, d) for d in datasets]
    logging.info(
        "Executing %d requests against DBS 'blocks' API, with details",
        len(urls))
    data = multi_getdata(urls, ckey(), cert())

    for row in data:
        dataset = row['url'].split('=')[-1]
        if row['data'] is None:
            print("FAILURE: dbsInfo for %s. Error: %s %s" %
                  (dataset, row.get('code'), row.get('error')))
            continue
        rows = json.loads(row['data'])
        blocks = []
        size = 0
        datasetTransfers.setdefault(
            dataset, {})  # flat dict in the format of blockName: blockSize
        for item in rows:
            blocks.append(item['block_name'])
            size += item['block_size']
            datasetTransfers[dataset].update(
                {item['block_name']: item['block_size']})
        datasetBlocks[dataset] = blocks
        datasetSizes[dataset] = size

    return datasetBlocks, datasetSizes, datasetTransfers
Ejemplo n.º 10
0
 def _getRequestSpecs(self, requestNames):
     "Helper function to get all specs for given set of request names"
     urls = [str('%s/%s/spec' % (self.msConfig['reqmgrCacheUrl'], r)) for r in requestNames]
     data = multi_getdata(urls, ckey(), cert())
     rdict = {}
     for row in data:
         req = row['url'].split('/')[-2]
         rdict[req] = pickle.loads(row['data'])
     return rdict
Ejemplo n.º 11
0
def getRequestSpecs(requestNames):
    "Helper function to get all specs for given set of request names"
    urls = [str('%s/%s/spec' % (reqmgrCacheUrl(), r)) for r in requestNames]
    data = multi_getdata(urls, ckey(), cert())
    rdict = {}
    for row in data:
        req = row['url'].split('/')[-2]
        rdict[req] = pickle.loads(row['data'])
    return rdict
Ejemplo n.º 12
0
def getPileupSubscriptionsRucio(datasets, rucioUrl, rucioToken, scope="cms"):
    """
    Provided a list of datasets, find dataset level subscriptions where it's
    as complete as `percent_min`.
    :param datasets: list of dataset names
    :param rucioUrl: a string with the Rucio URL
    :param rucioToken: a string with the user rucio token
    :param scope: a string with the Rucio scope of our data
    :return: a dictionary of datasets and a list of their location.
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    # FIXME: we should definitely make a feature request to Rucio...
    # so much, just to get the final RSEs for a container!!!
    locationByDset = {}
    if not datasets:
        return locationByDset

    headers = {"X-Rucio-Auth-Token": rucioToken}
    # first, resolve the dataset into blocks
    blocksByDset = getContainerBlocksRucio(datasets, rucioUrl, rucioToken, scope)
    urls = []
    for _dset, blocks in blocksByDset.items():
        if blocks:
            for block in blocks:
                urls.append('{}/replicas/{}/{}/datasets'.format(rucioUrl, scope, quote(block)))

    # this is going to be bloody expensive in terms of HTTP requests
    logging.info("Executing %d requests against Rucio replicas API for blocks", len(urls))
    data = multi_getdata(urls, ckey(), cert(), headers=headers)
    for row in data:
        block = row['url'].split("/{}/".format(scope))[1]
        block = unquote(re.sub("/datasets$", "", block, 1))
        container = block.split("#")[0]
        locationByDset.setdefault(container, set())
        if row['data'] is None:
            msg = "Failure in getPileupSubscriptionsRucio container {} and block {}.".format(container, block)
            msg += " Response: {}".format(row)
            logging.error(msg)

            locationByDset[container] = None
            continue
        if locationByDset[container] is None:
            # then one of the block requests failed, skip the whole dataset
            continue
        thisBlockRSEs = set()
        for item in parseNewLineJson(row['data']):
            if item['state'] == "AVAILABLE":
                thisBlockRSEs.add(item["rse"])
        logging.info("Block: %s is available at: %s", block, thisBlockRSEs)
        # now we have the final block location
        if not locationByDset[container]:
            # then this is the first block of this dataset
            locationByDset[container] = thisBlockRSEs
        else:
            # otherwise, make an intersection of them
            locationByDset[container] = locationByDset[container] & thisBlockRSEs
    return locationByDset
Ejemplo n.º 13
0
def phedexInfo(datasets):
    "Fetch PhEDEx info about nodes for all datasets"
    urls = ['%s/blockreplicasummary?dataset=%s' % (phedexUrl(), d) for d in datasets]
    data = multi_getdata(urls, ckey(), cert())
    blockNodes = {}
    for row in data:
        rows = json.loads(row['data'])
        for item in rows['phedex']['block']:
            nodes = [r['node'] for r in item['replica'] if r['complete'] == 'y']
            blockNodes[item['name']] = nodes
    return blockNodes
Ejemplo n.º 14
0
def getBlocksAndSizeRucio(containers, rucioUrl, rucioToken, scope="cms"):
    """
    Given a list of containers, find all their correspondent blocks and their sizes.
    :param containers: list of container names
    :param rucioUrl: a string with the Rucio URL
    :param rucioToken: a string with the user rucio token
    :param scope: a string with the Rucio scope of our data
    :return: a dictionary in the form of:
    {"dataset":
        {"block":
            {"blockSize": 111, "locations": ["x", "y"]}
        }
    }
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    NOTE2: meant to return an output similar to Common.getBlockReplicasAndSize
    """
    contBlockSize = {}
    if not containers:
        return contBlockSize

    headers = {"X-Rucio-Auth-Token": rucioToken}
    urls = []
    for cont in containers:
        ### FIXME: the long attribute value type has recently changed integer to boolean
        ### see PR: https://github.com/rucio/rucio/pull/3949 , which went in in 1.23.5 series
        ### we need to make sure CMS production Rucio will be running that version once MicroServices
        ### get deployed to CMSWEB
        urls.append(
            '{}/dids/{}/dids/search?type=dataset&long=True&name={}'.format(
                rucioUrl, scope, quote(cont + "#*")))
    logging.info(
        "Executing %d requests against Rucio DIDs search API for containers",
        len(urls))
    data = multi_getdata(urls, ckey(), cert(), headers=headers)
    for row in data:
        container = row['url'].split("name=")[1]
        container = unquote(container).replace("#*", "")
        contBlockSize.setdefault(container, {})
        if row['data'] in [None, ""]:
            msg = "Failure in getBlocksAndSizeRucio function for container {}. Response: {}".format(
                container, row)
            logging.error(msg)
            contBlockSize[container] = None
            continue

        for item in parseNewLineJson(row['data']):
            # NOTE: we do not care about primary block location in Rucio
            contBlockSize[container][item['name']] = {
                "blockSize": item['bytes'],
                "locations": []
            }
    return contBlockSize
Ejemplo n.º 15
0
def getRequestWorkflows(requestNames):
    "Helper function to get all specs for given set of request names"
    urls = [str('%s/data/request/%s' % (reqmgrUrl(), r)) for r in requestNames]
    data = multi_getdata(urls, ckey(), cert())
    rdict = {}
    for row in data:
        req = row['url'].split('/')[-1]
        try:
            data = json.loads(row['data'])
            rdict[req] = data['result'][0] # we get back {'result': [workflow]} dict
        except Exception as exp:
            print("ERROR: fail to load data as json record, error=%s" % str(exp))
            print(row)
    return rdict
Ejemplo n.º 16
0
def phedexInfo(datasets, phedexUrl):
    "Fetch PhEDEx info about nodes for all datasets"
    urls = [
        '%s/blockreplicasummary?dataset=%s' % (phedexUrl, d) for d in datasets
    ]
    data = multi_getdata(urls, ckey(), cert())
    blockNodes = {}
    for row in data:
        rows = json.loads(row['data'])
        for item in rows['phedex']['block']:
            nodes = [
                r['node'] for r in item['replica'] if r['complete'] == 'y'
            ]
            blockNodes[item['name']] = nodes
    return blockNodes
Ejemplo n.º 17
0
def getRequestWorkflows(requestNames):
    "Helper function to get all specs for given set of request names"
    urls = [str('%s/data/request/%s' % (reqmgrUrl(), r)) for r in requestNames]
    data = multi_getdata(urls, ckey(), cert())
    rdict = {}
    for row in data:
        req = row['url'].split('/')[-1]
        try:
            data = json.loads(row['data'])
            rdict[req] = data['result'][
                0]  # we get back {'result': [workflow]} dict
        except Exception as exp:
            print("ERROR: fail to load data as json record, error=%s" %
                  str(exp))
            print(row)
    return rdict
Ejemplo n.º 18
0
def getPileupSubscriptions(datasets, phedexUrl, group=None, percentMin=99):
    """
    Provided a list of datasets, find dataset level subscriptions where it's
    as complete as `percent_min`.
    :param datasets: list of dataset names
    :param phedexUrl: a string with the PhEDEx URL
    :param group: optional string with the PhEDEx group
    :param percent_min: only return subscriptions that are this complete
    :return: a dictionary of datasets and a list of their location.
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    locationByDset = {}
    if not datasets:
        return locationByDset

    if group:
        url = "%s/subscriptions?group=%s" % (phedexUrl, group)
        url += "&percent_min=%s&dataset=%s"
    else:
        url = "%s/subscriptions?" % phedexUrl
        url += "percent_min=%s&dataset=%s"
    urls = [url % (percentMin, dset) for dset in datasets]

    logging.info("Executing %d requests against PhEDEx 'subscriptions' API",
                 len(urls))
    data = multi_getdata(urls, ckey(), cert())

    for row in data:
        dataset = row['url'].rsplit('=')[-1]
        if row['data'] is None:
            print(
                "Failure in getPileupSubscriptions for dataset %s. Error: %s %s"
                % (dataset, row.get('code'), row.get('error')))
            locationByDset.setdefault(dataset, None)
            continue
        rows = json.loads(row['data'])
        locationByDset.setdefault(dataset, [])
        try:
            for item in rows['phedex']['dataset']:
                for subs in item['subscription']:
                    locationByDset[dataset].append(subs['node'])
        except Exception as exc:
            print(
                "Failure in getPileupSubscriptions for dataset %s. Error: %s" %
                (dataset, str(exc)))
            locationByDset[dataset] = None
    return locationByDset
Ejemplo n.º 19
0
def getPileupContainerSizesRucio(containers,
                                 rucioUrl,
                                 rucioToken,
                                 scope="cms"):
    """
    Given a list of containers, find their total size in Rucio
    :param containers: list of container names
    :param rucioUrl: a string with the Rucio URL
    :param rucioToken: a string with the user rucio token
    :param scope: a string with the Rucio scope of our data
    :return: a flat dictionary of container and their respective sizes
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    NOTE: Rucio version of getPileupDatasetSizes()
    """
    sizeByDset = {}
    if not containers:
        return sizeByDset

    headers = {"X-Rucio-Auth-Token": rucioToken}

    urls = [
        '{}/dids/{}/{}?dynamic=anything'.format(rucioUrl, scope, cont)
        for cont in containers
    ]
    logging.info("Executing %d requests against Rucio for the container size",
                 len(urls))
    data = multi_getdata(urls, ckey(), cert(), headers=headers)

    for row in data:
        container = row['url'].split('/dids/{}/'.format(scope))[1]
        container = container.replace("?dynamic=anything", "")
        if row['data'] is None:
            msg = "Failure in getPileupContainerSizesRucio for container {}. Response: {}".format(
                container, row)
            logging.error(msg)
            sizeByDset.setdefault(container, None)
            continue
        response = json.loads(row['data'])
        try:
            sizeByDset.setdefault(container, response['bytes'])
        except KeyError:
            msg = "getPileupContainerSizesRucio function did not return a valid response for container: %s. Error: %s"
            logging.error(msg, container, response)
            sizeByDset.setdefault(container, None)
            continue
    return sizeByDset
Ejemplo n.º 20
0
 def _getRequestWorkflows(self, requestNames):
     "Helper function to get all specs for given set of request names"
     urls = [str('%s/data/request/%s' % (self.msConfig['reqmgr2Url'], r)) for r in requestNames]
     self.logger.debug("getRequestWorkflows")
     for u in urls:
         self.logger.debug("url %s", u)
     data = multi_getdata(urls, ckey(), cert())
     rdict = {}
     for row in data:
         req = row['url'].split('/')[-1]
         try:
             data = json.loads(row['data'])
             rdict[req] = data['result'][0]  # we get back {'result': [workflow]} dict
         except Exception as exp:
             self.logger.error("fail to process row %s", row)
             self.logger.exception("fail to load data as json record, error=%s", str(exp))
     return rdict
Ejemplo n.º 21
0
def getRunsInBlock(blocks, dbsUrl):
    """
    Provided a list of block names, find their run numbers
    :param blocks: list of block names
    :param dbsUrl: string with the DBS URL
    :return: a dictionary of block names and a list of run numbers
    """
    runsByBlock = {}
    urls = ['%s/runs?block_name=%s' % (dbsUrl, quote(b)) for b in blocks]
    data = multi_getdata(urls, ckey(), cert())
    for row in data:
        blockName = unquote(row['url'].rsplit('=')[-1])
        if row['data'] is None:
            msg = "Failure in getRunsInBlock for block %s. Error: %s %s" % (
                blockName, row.get('code'), row.get('error'))
            raise RuntimeError(msg)
        rows = json.loads(row['data'])
        runsByBlock[blockName] = rows[0]['run_num']
    return runsByBlock
Ejemplo n.º 22
0
def eventsLumisInfo(inputs, dbsUrl, validFileOnly=0, sumOverLumi=0):
    "Get information about events and lumis for given set of inputs: blocks or datasets"
    what = 'dataset'
    eventsLumis = {}
    if not inputs:
        return eventsLumis
    if '#' in inputs[0]:  # inputs are list of blocks
        what = 'block_name'
    urls = ['%s/filesummaries?validFileOnly=%s&sumOverLumi=%s&%s=%s' \
            % (dbsUrl, validFileOnly, sumOverLumi, what, urllib.quote(i)) \
            for i in inputs]
    data = multi_getdata(urls, ckey(), cert())
    for row in data:
        key = row['url'].split('=')[-1]
        if what == 'block_name':
            key = urllib.unquote(key)
        rows = json.loads(row['data'])
        for item in rows:
            eventsLumis[key] = item
    return eventsLumis
Ejemplo n.º 23
0
def phedexInfo(datasets, phedexUrl):
    "Fetch PhEDEx info about nodes for all datasets"
    urls = [
        '%s/blockreplicasummary?dataset=%s' % (phedexUrl, d) for d in datasets
    ]
    data = multi_getdata(urls, ckey(), cert())
    blockNodes = {}
    for row in data:
        dataset = row['url'].rsplit('=')[-1]
        if row['data'] is None:
            print("FAILURE: phedexInfo for %s. Error: %s %s" %
                  (dataset, row.get('code'), row.get('error')))
            continue
        rows = json.loads(row['data'])
        for item in rows['phedex']['block']:
            nodes = [
                r['node'] for r in item['replica'] if r['complete'] == 'y'
            ]
            blockNodes[item['name']] = nodes
    return blockNodes
Ejemplo n.º 24
0
def eventsLumisInfo(inputs, validFileOnly=0, sumOverLumi=0):
    "Get information about events and lumis for given set of inputs: blocks or datasets"
    what = 'dataset'
    eventsLumis = {}
    if not inputs:
        return eventsLumis
    if '#' in inputs[0]: # inputs are list of blocks
        what = 'block_name'
    urls = ['%s/filesummaries?validFileOnly=%s&sumOverLumi=%s&%s=%s' \
            % (dbsUrl(), validFileOnly, sumOverLumi, what, urllib.quote(i)) \
            for i in inputs]
    data = multi_getdata(urls, ckey(), cert())
    for row in data:
        key = row['url'].split('=')[-1]
        if what == 'block_name':
            key = urllib.unquote(key)
        rows = json.loads(row['data'])
        for item in rows:
            eventsLumis[key] = item
    return eventsLumis
Ejemplo n.º 25
0
def getPileupDatasetSizes(datasets, phedexUrl):
    """
    Given a list of datasets, find all their blocks with replicas
    available, i.e., blocks that have valid files to be processed,
    and calculate the total dataset size
    :param datasets: list of dataset names
    :param phedexUrl: a string with the PhEDEx URL
    :return: a dictionary of datasets and their respective sizes
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    sizeByDset = {}
    if not datasets:
        return sizeByDset

    urls = [
        '%s/blockreplicas?dataset=%s' % (phedexUrl, dset) for dset in datasets
    ]
    logging.info("Executing %d requests against PhEDEx 'blockreplicas' API",
                 len(urls))
    data = multi_getdata(urls, ckey(), cert())

    for row in data:
        dataset = row['url'].split('=')[-1]
        if row['data'] is None:
            print(
                "Failure in getPileupDatasetSizes for dataset %s. Error: %s %s"
                % (dataset, row.get('code'), row.get('error')))
            sizeByDset.setdefault(dataset, None)
            continue
        rows = json.loads(row['data'])
        sizeByDset.setdefault(dataset, 0)
        try:
            for item in rows['phedex']['block']:
                sizeByDset[dataset] += item['bytes']
        except Exception as exc:
            print(
                "Failure in getPileupDatasetSizes for dataset %s. Error: %s" %
                (dataset, str(exc)))
            sizeByDset[dataset] = None
    return sizeByDset
Ejemplo n.º 26
0
def dbsInfo(datasets):
    "Provides DBS info about dataset blocks"
    urls = ['%s/blocks?detail=True&dataset=%s' % (dbsUrl(), d) for d in datasets]
    data = multi_getdata(urls, ckey(), cert())
    datasetBlocks = {}
    datasetSizes = {}
#     nblocks = 0
    for row in data:
        dataset = row['url'].split('=')[-1]
        rows = json.loads(row['data'])
        blocks = []
        size = 0
        for item in rows:
            blocks.append(item['block_name'])
            size += item['block_size']
        datasetBlocks[dataset] = blocks
        datasetSizes[dataset] = size
#         nblocks += len(blocks)
#     tot_size = 0
#     for dataset, blocks in datasetBlocks.iteritems():
#         tot_size += datasetSizes[dataset]
    return datasetBlocks, datasetSizes
Ejemplo n.º 27
0
def findBlockParents(blocks, dbsUrl):
    """
    Helper function to find block parents given a list of block names.
    Return a dictionary in the format of:
    {"child dataset name": {"child block": ["parent blocks"],
                            "child block": ["parent blocks"], ...}}
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    parentsByBlock = {}
    urls = [
        '%s/blockparents?block_name=%s' % (dbsUrl, quote(b)) for b in blocks
    ]
    logging.info("Executing %d requests against DBS 'blockparents' API",
                 len(urls))
    data = multi_getdata(urls, ckey(), cert())
    for row in data:
        blockName = unquote(row['url'].rsplit('=')[-1])
        dataset = blockName.split("#")[0]
        if row['data'] is None:
            print("Failure in findBlockParents for block %s. Error: %s %s" %
                  (blockName, row.get('code'), row.get('error')))
            parentsByBlock.setdefault(dataset, None)
            continue
        rows = json.loads(row['data'])
        try:
            if dataset in parentsByBlock and parentsByBlock[dataset] is None:
                # then one of the block calls has failed, keep it failed!
                continue
            parentsByBlock.setdefault(dataset, {})
            for item in rows:
                parentsByBlock[dataset].setdefault(item['this_block_name'],
                                                   set())
                parentsByBlock[dataset][item['this_block_name']].add(
                    item['parent_block_name'])
        except Exception as exc:
            print("Failure in findBlockParents for block %s. Error: %s" %
                  (blockName, str(exc)))
            parentsByBlock[dataset] = None
    return parentsByBlock
Ejemplo n.º 28
0
def eventsLumisInfo(inputs, dbsUrl, validFileOnly=0, sumOverLumi=0):
    "Get information about events and lumis for given set of inputs: blocks or datasets"
    what = 'dataset'
    eventsLumis = {}
    if not inputs:
        return eventsLumis
    if '#' in inputs[0]:  # inputs are list of blocks
        what = 'block_name'
    urls = [
        '%s/filesummaries?validFileOnly=%s&sumOverLumi=%s&%s=%s' %
        (dbsUrl, validFileOnly, sumOverLumi, what, quote(i)) for i in inputs
    ]
    data = multi_getdata(urls, ckey(), cert())
    for row in data:
        data = unquote(row['url'].split('=')[-1])
        if row['data'] is None:
            print("FAILURE: eventsLumisInfo for %s. Error: %s %s" %
                  (data, row.get('code'), row.get('error')))
            continue
        rows = json.loads(row['data'])
        for item in rows:
            eventsLumis[data] = item
    return eventsLumis
Ejemplo n.º 29
0
 def fetch(self):
     "Fetch information about sites from various CMS data-services"
     tfile = tempfile.NamedTemporaryFile()
     dashboardUrl = "http://dashb-ssb.cern.ch/dashboard/request.py"
     urls = [
         '%s/getplotdata?columnid=106&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=107&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=108&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=109&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=136&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=158&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=159&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=160&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=237&batch=1&lastdata=1' % dashboardUrl,
         ### FIXME: these calls to gwmsmon are failing pretty badly with
         ### "302 Found" and failing to decode, causing a huge error dump
         ### to the logs
         # 'https://cms-gwmsmon.cern.ch/totalview/json/site_summary',
         # 'https://cms-gwmsmon.cern.ch/prodview/json/site_summary',
         # 'https://cms-gwmsmon.cern.ch/poolview/json/totals',
         # 'https://cms-gwmsmon.cern.ch/prodview/json/maxusedcpus',
         'http://cmsgwms-frontend-global.cern.ch/vofrontend/stage/mcore_siteinfo.json',
         'http://t3serv001.mit.edu/~cmsprod/IntelROCCS/Detox/SitesInfo.txt',
         'http://cmsmonitoring.web.cern.ch/cmsmonitoring/storageoverview/latest/StorageOverview.json',
     ]
     cookie = {}
     ssbids = [
         '106', '107', '108', '109', '136', '158', '159', '160', '237'
     ]
     sids = ['1', '2', 'm1', 'm3', 'm4', 'm5', 'm6']
     for url in urls:
         if 'gwmsmon' in url:
             cern_sso_cookie(url, tfile.name, cert(), ckey())
             cookie.update({url: tfile.name})
     gen = multi_getdata(urls, ckey(), cert(), cookie=cookie)
     siteInfo = {}
     for row in gen:
         if 'Detox' in row['url']:
             data = row['data']
         else:
             try:
                 data = json.loads(row['data'])
             except Exception as exc:
                 self.logger.exception('error %s for row %s', str(exc), row)
                 data = {}
         if 'ssb' in row['url']:
             for ssbid in ssbids:
                 if ssbid in row['url']:
                     siteInfo['ssb_%s' % ssbid] = data
         elif 'prodview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_prod_site_summary'] = data
         elif 'totalview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_site_summary'] = data
         elif 'totals' in row['url']:
             siteInfo['gwmsmon_totals'] = data
         elif 'maxusedcpus' in row['url']:
             siteInfo['gwmsmon_prod_maxused'] = data
         elif 'mcore' in row['url']:
             siteInfo['mcore'] = data
         elif 'Detox' in row['url']:
             siteInfo['detox_sites'] = data
         elif 'monitoring' in row['url']:
             siteInfo['mss_usage'] = data
         elif 'stuck' in row['url']:
             for sid in sids:
                 if sid in row['url']:
                     siteInfo['stuck_%s' % sid] = data
         siteInfo['site_queues'] = getNodeQueues()
     return siteInfo
Ejemplo n.º 30
0
 def fetch(self):
     "Fetch information about sites from various CMS data-services"
     tfile = tempfile.NamedTemporaryFile()
     urls = [
         '%s/getplotdata?columnid=106&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=107&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=108&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=109&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=136&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=158&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=159&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=160&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=237&batch=1&lastdata=1' % dashboardUrl(),
         'https://cms-gwmsmon.cern.ch/totalview/json/site_summary',
         'https://cms-gwmsmon.cern.ch/prodview/json/site_summary',
         'https://cms-gwmsmon.cern.ch/poolview/json/totals',
         'https://cms-gwmsmon.cern.ch/prodview/json/maxusedcpus',
         'http://cmsgwms-frontend-global.cern.ch/vofrontend/stage/mcore_siteinfo.json',
         'http://t3serv001.mit.edu/~cmsprod/IntelROCCS/Detox/SitesInfo.txt',
         '%s/storageoverview/latest/StorageOverview.json' % monitoringUrl(),
         '%s/stuck_1.json' % stucktransferUrl(),
         '%s/stuck_2.json' % stucktransferUrl(),
         '%s/stuck_m1.json' % stucktransferUrl(),
         '%s/stuck_m3.json' % stucktransferUrl(),
         '%s/stuck_m4.json' % stucktransferUrl(),
         '%s/stuck_m5.json' % stucktransferUrl(),
         '%s/stuck_m6.json' % stucktransferUrl(),
     ]
     cookie = {}
     ssbids = ['106', '107', '108', '109', '136', '158', '159', '160', '237']
     sids = ['1', '2', 'm1', 'm3', 'm4', 'm5', 'm6']
     for url in urls:
         if 'gwmsmon' in url:
             cern_sso_cookie(url, tfile.name, cert(), ckey())
             cookie.update({url: tfile.name})
     gen = multi_getdata(urls, ckey(), cert(), cookie=cookie)
     siteInfo = {}
     for row in gen:
         if 'Detox' in row['url']:
             data = row['data']
         else:
             try:
                 data = json.loads(row['data'])
             except Exception:
                 traceback.print_exc()
                 print(row)
                 data = {}
         if 'ssb' in row['url']:
             for ssbid in ssbids:
                 if ssbid in row['url']:
                     siteInfo['ssb_%s' % ssbid] = data
         elif 'prodview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_prod_site_summary'] = data
         elif 'totalview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_site_summary'] = data
         elif 'totals' in row['url']:
             siteInfo['gwmsmon_totals'] = data
         elif 'maxusedcpus' in row['url']:
             siteInfo['gwmsmon_prod_maxused'] = data
         elif 'mcore' in row['url']:
             siteInfo['mcore'] = data
         elif 'Detox' in row['url']:
             siteInfo['detox_sites'] = data
         elif 'monitoring' in row['url']:
             siteInfo['mss_usage'] = data
         elif 'stuck' in row['url']:
             for sid in sids:
                 if sid in row['url']:
                     siteInfo['stuck_%s' % sid] = data
         siteInfo['site_queues'] = getNodeQueues()
     siteInfo['ready_in_agent'] = agentsSites(agentInfoUrl())
     return siteInfo
Ejemplo n.º 31
0
 def fetch(self):
     "Fetch information about sites from various CMS data-services"
     tfile = tempfile.NamedTemporaryFile()
     urls = [
         '%s/getplotdata?columnid=106&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=107&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=108&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=109&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=136&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=158&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=159&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=160&batch=1&lastdata=1' % dashboardUrl(),
         '%s/getplotdata?columnid=237&batch=1&lastdata=1' % dashboardUrl(),
         ### FIXME: these calls to gwmsmon are failing pretty badly with
         ### "302 Found" and failing to decode, causing a huge error dump
         ### to the logs
         # 'https://cms-gwmsmon.cern.ch/totalview/json/site_summary',
         # 'https://cms-gwmsmon.cern.ch/prodview/json/site_summary',
         # 'https://cms-gwmsmon.cern.ch/poolview/json/totals',
         # 'https://cms-gwmsmon.cern.ch/prodview/json/maxusedcpus',
         'http://cmsgwms-frontend-global.cern.ch/vofrontend/stage/mcore_siteinfo.json',
         'http://t3serv001.mit.edu/~cmsprod/IntelROCCS/Detox/SitesInfo.txt',
         '%s/storageoverview/latest/StorageOverview.json' % monitoringUrl(),
     ]
     cookie = {}
     ssbids = ['106', '107', '108', '109', '136', '158', '159', '160', '237']
     sids = ['1', '2', 'm1', 'm3', 'm4', 'm5', 'm6']
     for url in urls:
         if 'gwmsmon' in url:
             cern_sso_cookie(url, tfile.name, cert(), ckey())
             cookie.update({url: tfile.name})
     gen = multi_getdata(urls, ckey(), cert(), cookie=cookie)
     siteInfo = {}
     for row in gen:
         if 'Detox' in row['url']:
             data = row['data']
         else:
             try:
                 data = json.loads(row['data'])
             except Exception:
                 traceback.print_exc()
                 print(row)
                 data = {}
         if 'ssb' in row['url']:
             for ssbid in ssbids:
                 if ssbid in row['url']:
                     siteInfo['ssb_%s' % ssbid] = data
         elif 'prodview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_prod_site_summary'] = data
         elif 'totalview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_site_summary'] = data
         elif 'totals' in row['url']:
             siteInfo['gwmsmon_totals'] = data
         elif 'maxusedcpus' in row['url']:
             siteInfo['gwmsmon_prod_maxused'] = data
         elif 'mcore' in row['url']:
             siteInfo['mcore'] = data
         elif 'Detox' in row['url']:
             siteInfo['detox_sites'] = data
         elif 'monitoring' in row['url']:
             siteInfo['mss_usage'] = data
         elif 'stuck' in row['url']:
             for sid in sids:
                 if sid in row['url']:
                     siteInfo['stuck_%s' % sid] = data
         siteInfo['site_queues'] = getNodeQueues()
     siteInfo['ready_in_agent'] = agentsSites(agentInfoUrl())
     return siteInfo
Ejemplo n.º 32
0
def listReplicationRules(containers, rucioAccount, grouping,
                         rucioUrl, rucioToken, scope="cms"):
    """
    List all the replication rules for the input filters provided.
    It builds a dictionary of container name and the locations where
    they have a rule locking data on, with some additional rule state
    logic in the code.
    :param containers: list of container names
    :param rucioAccount: string with the rucio account
    :param grouping: rule grouping string, only "A" or "D" are allowed
    :param rucioUrl: string with the Rucio url
    :param rucioToken: string with the Rucio token
    :param scope: string with the data scope
    :return: a flat dictionary key'ed by the container name, with a list of RSE
      expressions that still need to be resolved
    NOTE: Value `None` is returned in case the data-service failed to serve a given request.
    """
    locationByContainer = {}
    if not containers:
        return locationByContainer
    if grouping not in ["A", "D"]:
        raise RuntimeError("Replication rule grouping value provided ({}) is not allowed!".format(grouping))

    headers = {"X-Rucio-Auth-Token": rucioToken}
    urls = []
    for cont in containers:
        urls.append('{}/rules/?scope={}&account={}&grouping={}&name={}'.format(rucioUrl, scope, rucioAccount,
                                                                               grouping, quote(cont, safe="")))
    logging.info("Executing %d requests against Rucio to list replication rules", len(urls))
    data = multi_getdata(urls, ckey(), cert(), headers=headers)

    for row in data:
        container = unquote(row['url'].split("name=")[1])
        if "200 OK" not in row['headers']:
            msg = "Failure in listReplicationRules for container {}. Response: {}".format(container, row)
            logging.error(msg)
            locationByContainer.setdefault(container, None)
            continue
        try:
            locationByContainer.setdefault(container, [])
            for item in parseNewLineJson(row['data']):
                if item['state'] in ["U", "SUSPENDED"]:
                    logging.warning("Container %s has a SUSPENDED rule. Skipping rule: %s", container, item)
                    continue
                elif item['state'] in ["S", "STUCK"]:
                    if item['error'] == 'NO_SOURCES:NO_SOURCES':
                        msg = "Container {} has a STUCK rule with NO_SOURCES.".format(container)
                        msg += " Data could be lost forever... Rule info is: {}".format(item)
                        logging.warning(msg)
                        continue

                    timeDiff = item['stuck_at'] - item['created_at']
                    if int(timeDiff.days) > STUCK_LIMIT:
                        msg = "Container {} has a STUCK rule for {} days (limit set to: {}).".format(container,
                                                                                                     timeDiff.days,
                                                                                                     STUCK_LIMIT)
                        msg += " Not going to use it! Rule info: {}".format(item)
                        logging.warning(msg)
                        continue
                    else:
                        msg = "Container {} has a STUCK rule for only {} days.".format(container, timeDiff.days)
                        msg += " Considering it for the pileup location"
                        logging.info(msg)
                else:
                    logging.info("Container %s has rule ID %s in state %s, using it.",
                                 container, item['id'], item['state'])

                ### NOTE: this is not an RSE name, but an RSE expression that still needs to be resolved
                locationByContainer[container].append(item['rse_expression'])
        except Exception as exc:
            msg = "listReplicationRules function did not return a valid response for container: %s."
            msg += "Server responded with: %s\nError: %s"
            logging.exception(msg, container, str(exc), row['data'])
            locationByContainer.setdefault(container, None)
            continue
    return locationByContainer