Esempio n. 1
0
    def getParentDatasets(self, workflows):
        """
        Given a list of requests, find which requests need to process a parent
        dataset, and discover what the parent dataset name is.
        :return: dictionary with the child and the parent dataset
        """
        retryWorkflows = []
        retryDatasets = []
        datasetByDbs = {}
        parentByDset = {}
        for wflow in workflows:
            if wflow.hasParents():
                datasetByDbs.setdefault(wflow.getDbsUrl(), set())
                datasetByDbs[wflow.getDbsUrl()].add(wflow.getInputDataset())

        for dbsUrl, datasets in datasetByDbs.items():
            self.logger.info("Resolving %d dataset parentage against DBS: %s",
                             len(datasets), dbsUrl)
            # first find out what's the parent dataset name
            parentByDset.update(findParent(datasets, dbsUrl))

        # now check if any of our calls failed; if so, workflow needs to be skipped from this cycle
        # FIXME: isn't there a better way to do this?!?
        for dset, value in parentByDset.items():
            if value is None:
                retryDatasets.append(dset)
        if retryDatasets:
            for wflow in workflows:
                if wflow.hasParents() and wflow.getInputDataset(
                ) in retryDatasets:
                    retryWorkflows.append(wflow)
            # remove workflows that failed one or more of the bulk queries to the data-service
            self._workflowRemoval(workflows, retryWorkflows)

        return parentByDset
Esempio n. 2
0
 def setParentDatasets(self, wflow):
     """
     Used to resolve parent datasets for a workflow.
     :param  wflow:   A MSRuleCleaner workflow representation
     :return:         The workflow object
     """
     if wflow['InputDataset'] and wflow['IncludeParents']:
         childDataset = wflow['InputDataset']
         parentDataset = findParent([childDataset], self.msConfig['dbsUrl'])
         # NOTE: If findParent() returned None then the DBS service failed to
         #       resolve the request (it is considered an ERROR outside WMCore)
         if parentDataset.get(childDataset, None) is None:
             msg = "Failed to resolve parent dataset for: %s in workflow: %s" % (childDataset, wflow['RequestName'])
             raise MSRuleCleanerResolveParentError(msg)
         elif parentDataset:
             wflow['ParentDataset'] = [parentDataset[childDataset]]
             msg = "Found parent %s for input dataset %s in workflow: %s "
             self.logger.info(msg, parentDataset, wflow['InputDataset'], wflow['RequestName'])
         else:
             msg = "Could not find parent for input dataset: %s in workflows: %s"
             self.logger.error(msg, wflow['InputDataset'], wflow['RequestName'])
     return wflow
Esempio n. 3
0
 def test_findParent(self):
     "Test function for findParent()"
     parents = findParent(self.child, self.dbsUrl)
     self.assertEqual(parents[self.child[0]],
                      '/SingleElectron/Run2016B-v2/RAW')