Example #1
0
    def getInputDataBlocks(self, workflows):
        """
        Given a list of requests, list all the primary and parent datasets and, find
        their block sizes and which locations host completed and subscribed blocks
        NOTE it only uses valid blocks (i.e., blocks with at least one replica!)
        :param workflows: a list of Workflow objects
        :return: dictionary with dataset and a few block information
        """
        retryWorkflows = []
        retryDatasets = []
        datasets = set()
        for wflow in workflows:
            for dataIn in wflow.getDataCampaignMap():
                if dataIn['type'] in ["primary", "parent"]:
                    datasets.add(dataIn['name'])

        # fetch all block names and their sizes from Rucio
        self.logger.info("Fetching parent/primary block info for %d containers against Rucio: %s",
                         len(datasets), self.msConfig['rucioUrl'])
        blocksByDset = getBlocksAndSizeRucio(datasets, self.msConfig['rucioUrl'], self.rucioToken)

        # now check if any of our calls failed; if so, workflow needs to be skipped from this cycle
        # FIXME: isn't there a better way to do this?!?
        for dset, value in blocksByDset.items():
            if value is None:
                retryDatasets.append(dset)
        if retryDatasets:
            for wflow in workflows:
                if wflow.getInputDataset() in retryDatasets or wflow.getParentDataset() in retryDatasets:
                    retryWorkflows.append(wflow)
            # remove workflows that failed one or more of the bulk queries to the data-service
            self._workflowRemoval(workflows, retryWorkflows)
        return blocksByDset
Example #2
0
 def testGetBlocksAndSizeRucio(self):
     """
     Test the getBlocksAndSizeRucio function, which fetches all the blocks
     (in a container) and their sizes.
     """
     self.rucioToken, self.tokenValidity = getRucioToken(self.rucioAuthUrl, self.rucioAccount)
     # Test 1: no DIDs provided as input
     resp = getBlocksAndSizeRucio([], self.rucioUrl, self.rucioToken, self.rucioScope)
     self.assertEqual(resp, {})
     # Test 2: multiple valid/invalid DIDs provided as input
     containers = [PU_CONT, CONT3, self.badDID]
     resp = getBlocksAndSizeRucio(containers, self.rucioUrl, self.rucioToken, self.rucioScope)
     self.assertTrue(len(resp) == 3)
     self.assertTrue(resp[PU_CONT][PU_CONT_BLK]['blockSize'] > 0)
     self.assertTrue(isinstance(resp[PU_CONT][PU_CONT_BLK]['locations'], list))
     self.assertTrue(len(resp[CONT3]) > 0)
     self.assertIsNone(resp[self.badDID])
Example #3
0
    def testGetBlocksAndSizeRucio(self):
        """
        Test the getBlocksAndSizeRucio function, which fetches all the blocks
        (in a container) and their sizes.
        """
        BLOCK = "/DMSimp_MonoZLL_NLO_Vector_TuneCP3_GQ0p25_GDM1p0_MY1-500_MXd-1/RunIIFall17NanoAODv4-PU2017_12Apr2018_Nano14Dec2018_102X_mc2017_realistic_v6-v1/NANOAODSIM#048c25e9-38bb-496d-86f7-405ffd3d3fd8"
        resp = getBlocksAndSizeRucio([], self.rucioUrl, self.rucioToken,
                                     self.rucioScope)
        self.assertEqual(resp, {})

        containers = [CONT2, CONT4, self.badDID]
        resp = getBlocksAndSizeRucio(containers, self.rucioUrl,
                                     self.rucioToken, self.rucioScope)
        self.assertTrue(len(resp) == 3)
        self.assertTrue(CONT2 in resp)
        self.assertTrue(len(resp[CONT4]) > 3)
        self.assertItemsEqual(list(resp[CONT4][BLOCK]),
                              ["blockSize", "locations"])
        self.assertIsNone(resp[self.badDID])