Example #1
0
    def testGetPileupContainerSizesRucio(self):
        """
        Test the getPileupContainerSizesRucio function, which fetches the container
        total bytes.
        """
        self.rucioToken, self.tokenValidity = getRucioToken(self.rucioAuthUrl, self.rucioAccount)
        # Test 1: no DIDs provided as input
        resp = getPileupContainerSizesRucio([], self.rucioUrl,
                                            self.rucioToken, scope=self.rucioScope)
        self.assertEqual(resp, {})

        # Test 2: multiple valid/invalid DIDs provided as input
        containers = [CONT1, PU_CONT, self.badDID]
        resp = getPileupContainerSizesRucio(containers, self.rucioUrl,
                                            self.rucioToken, scope=self.rucioScope)
        self.assertTrue(len(resp) == 3)
        self.assertTrue(resp[PU_CONT] > 0)
        self.assertIsNone(resp[self.badDID])
Example #2
0
    def testGetPileupContainerSizesRucio(self):
        """
        Test the getPileupContainerSizesRucio function, which fetches the container
        total bytes.
        """
        resp = getPileupContainerSizesRucio([],
                                            self.rucioUrl,
                                            self.rucioToken,
                                            scope=self.rucioScope)
        self.assertEqual(resp, {})

        containers = [CONT1, CONT2, self.badDID]
        resp = getPileupContainerSizesRucio(containers,
                                            self.rucioUrl,
                                            self.rucioToken,
                                            scope=self.rucioScope)
        self.assertTrue(len(resp) == 3)
        self.assertTrue(CONT2 in resp)
        self.assertTrue(resp[CONT1] > 0)
        self.assertIsNone(resp[self.badDID])
Example #3
0
    def getSecondaryDatasets(self, workflows):
        """
        Given a list of requests, list all the pileup datasets and, find their
        total dataset sizes and which locations host completed and subscribed datasets.
        NOTE it only uses valid blocks (i.e., blocks with at least one replica!)
        :param workflows: a list of Workflow objects
        :return: two dictionaries keyed by the dataset.
           First contains dataset size as value.
           Second contains a list of locations as value.
        """
        retryWorkflows = []
        retryDatasets = []
        datasets = set()
        for wflow in workflows:
            datasets = datasets | wflow.getPileupDatasets()

        # retrieve pileup container size and locations from Rucio
        self.logger.info(
            "Fetching pileup dataset sizes for %d datasets against Rucio: %s",
            len(datasets), self.msConfig['rucioUrl'])
        sizesByDset = getPileupContainerSizesRucio(datasets,
                                                   self.msConfig['rucioUrl'],
                                                   self.rucioToken)

        # then fetch data location for locked data, under our own rucio account
        self.logger.info(
            "Fetching pileup container location for %d containers against Rucio: %s",
            len(datasets), self.msConfig['rucioUrl'])
        locationsByDset = listReplicationRules(
            datasets,
            self.msConfig['rucioAccount'],
            grouping="A",
            rucioUrl=self.msConfig['rucioUrl'],
            rucioToken=self.rucioToken)
        # now check if any of our calls failed; if so, workflow needs to be skipped from this cycle
        # FIXME: isn't there a better way to do this?!?
        for dset, value in viewitems(sizesByDset):
            if value is None:
                retryDatasets.append(dset)
        for dset, value in viewitems(locationsByDset):
            if value is None:
                retryDatasets.append(dset)
        if retryDatasets:
            for wflow in workflows:
                for pileup in wflow.getPileupDatasets():
                    if pileup in retryDatasets:
                        retryWorkflows.append(wflow)
            # remove workflows that failed one or more of the bulk queries to the data-service
            self._workflowRemoval(workflows, retryWorkflows)
        return sizesByDset, locationsByDset