def testGetPileupContainerSizesRucio(self): """ Test the getPileupContainerSizesRucio function, which fetches the container total bytes. """ self.rucioToken, self.tokenValidity = getRucioToken(self.rucioAuthUrl, self.rucioAccount) # Test 1: no DIDs provided as input resp = getPileupContainerSizesRucio([], self.rucioUrl, self.rucioToken, scope=self.rucioScope) self.assertEqual(resp, {}) # Test 2: multiple valid/invalid DIDs provided as input containers = [CONT1, PU_CONT, self.badDID] resp = getPileupContainerSizesRucio(containers, self.rucioUrl, self.rucioToken, scope=self.rucioScope) self.assertTrue(len(resp) == 3) self.assertTrue(resp[PU_CONT] > 0) self.assertIsNone(resp[self.badDID])
def testGetPileupContainerSizesRucio(self): """ Test the getPileupContainerSizesRucio function, which fetches the container total bytes. """ resp = getPileupContainerSizesRucio([], self.rucioUrl, self.rucioToken, scope=self.rucioScope) self.assertEqual(resp, {}) containers = [CONT1, CONT2, self.badDID] resp = getPileupContainerSizesRucio(containers, self.rucioUrl, self.rucioToken, scope=self.rucioScope) self.assertTrue(len(resp) == 3) self.assertTrue(CONT2 in resp) self.assertTrue(resp[CONT1] > 0) self.assertIsNone(resp[self.badDID])
def getSecondaryDatasets(self, workflows): """ Given a list of requests, list all the pileup datasets and, find their total dataset sizes and which locations host completed and subscribed datasets. NOTE it only uses valid blocks (i.e., blocks with at least one replica!) :param workflows: a list of Workflow objects :return: two dictionaries keyed by the dataset. First contains dataset size as value. Second contains a list of locations as value. """ retryWorkflows = [] retryDatasets = [] datasets = set() for wflow in workflows: datasets = datasets | wflow.getPileupDatasets() # retrieve pileup container size and locations from Rucio self.logger.info( "Fetching pileup dataset sizes for %d datasets against Rucio: %s", len(datasets), self.msConfig['rucioUrl']) sizesByDset = getPileupContainerSizesRucio(datasets, self.msConfig['rucioUrl'], self.rucioToken) # then fetch data location for locked data, under our own rucio account self.logger.info( "Fetching pileup container location for %d containers against Rucio: %s", len(datasets), self.msConfig['rucioUrl']) locationsByDset = listReplicationRules( datasets, self.msConfig['rucioAccount'], grouping="A", rucioUrl=self.msConfig['rucioUrl'], rucioToken=self.rucioToken) # now check if any of our calls failed; if so, workflow needs to be skipped from this cycle # FIXME: isn't there a better way to do this?!? for dset, value in viewitems(sizesByDset): if value is None: retryDatasets.append(dset) for dset, value in viewitems(locationsByDset): if value is None: retryDatasets.append(dset) if retryDatasets: for wflow in workflows: for pileup in wflow.getPileupDatasets(): if pileup in retryDatasets: retryWorkflows.append(wflow) # remove workflows that failed one or more of the bulk queries to the data-service self._workflowRemoval(workflows, retryWorkflows) return sizesByDset, locationsByDset