def execute(self, *args, **kwargs):
        self.logger.info("Data discovery with DBS") ## to be changed into debug
        dbs = get_dbs(self.config.Services.DBSUrl)
        if kwargs['task']['tm_dbs_url']:
            dbs = get_dbs(kwargs['task']['tm_dbs_url'])
        self.logger.debug("Data discovery through %s for %s" %(dbs, kwargs['task']['tm_taskname']))
        # Get the list of blocks for the locations and then call dls.
        # The WMCore DBS3 implementation makes one call to dls for each block
        # with locations = True
        blocks = [ x['Name'] for x in dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)]
        #Create a map for block's locations: for each block get the list of locations
        ll = dbs.dls.getLocations(list(blocks),  showProd = True)
        if len(ll) == 0:
            msg = "No location was found for %s in %s." %(kwargs['task']['tm_input_dataset'],kwargs['task']['tm_dbs_url'])
            self.logger.error("Setting %s as failed" % str(kwargs['task']['tm_taskname']))
            configreq = {'workflow': kwargs['task']['tm_taskname'],
                         'status': "FAILED",
                         'subresource': 'failure',
                         'failure': b64encode(msg)}
            self.server.post(self.resturl, data = urllib.urlencode(configreq))
            raise StopHandler(msg)
        locations = map(lambda x: map(lambda y: y.host, x.locations), ll)
        locationsmap = dict(zip(blocks, locations))
        filedetails = dbs.listDatasetFileDetails(kwargs['task']['tm_input_dataset'], True)

        return self.formatOutput(task=kwargs['task'], requestname=kwargs['task']['tm_taskname'], datasetfiles=filedetails, locations=locationsmap)
Example #2
0
    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0] #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           splitedBlockName['Offset'],
                                           splitedBlockName['NumOfFiles'],
                                           user = wmspec.getOwner().get("name"),
                                           group = wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)
        return blockName, dbsBlockDict[blockName]
Example #3
0
 def execute(self, *args, **kwargs):
     self.logger.info("Data discovery with DBS") ## to be changed into debug
     old_cert_val = os.getenv("X509_USER_CERT")
     old_key_val = os.getenv("X509_USER_KEY")
     os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert
     os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey
     # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules
     dbsurl = self.config.Services.DBSUrl
     if kwargs['task']['tm_dbs_url']:
         dbsurl = kwargs['task']['tm_dbs_url']
     dbs = get_dbs(dbsurl)
     #
     if old_cert_val != None:
         os.environ['X509_USER_CERT'] = old_cert_val
     else:
         del os.environ['X509_USER_CERT']
     if old_key_val != None:
         os.environ['X509_USER_KEY'] = old_key_val
     else:
         del os.environ['X509_USER_KEY']
     self.logger.debug("Data discovery through %s for %s" %(dbs, kwargs['task']['tm_taskname']))
     try:
         # Get the list of blocks for the locations and then call dls.
         # The WMCore DBS3 implementation makes one call to dls for each block
         # with locations = True so we are using locations=False and looking up location later
         blocks = [ x['Name'] for x in dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)]
     except DBSReaderError, dbsexc:
         #dataset not found in DBS is a known use case
         if str(dbsexc).find('No matching data'):
             raise TaskWorkerException("The CRAB3 server backend could not could not find dataset %s in this DBS instance: %s" % (kwargs['task']['tm_input_dataset'], dbsurl))
         raise
Example #4
0
    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0] #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           splitedBlockName['Offset'],
                                           splitedBlockName['NumOfFiles'],
                                           user = wmspec.getOwner().get("name"),
                                           group = wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)

            if wmspec.locationDataSourceFlag():
                blockInfo = dbsBlockDict[blockName]
                seElements = []
                for cmsSite in match['Inputs'].values()[0]: #TODO: Allow more than one
                    ses = self.SiteDB.cmsNametoSE(cmsSite)
                    seElements.extend(ses)
                seElements = list(set(seElements))
                blockInfo['StorageElements'] = seElements
        return blockName, dbsBlockDict[blockName]
Example #5
0
 def organiseByDbs(self, dataItems):
     """Sort items by dbs instances - return dict with DBSReader as key & data items as values"""
     itemsByDbs = defaultdict(list)
     for item in dataItems:
         if ACDCBlock.checkBlockName(item['name']):
             # if it is acdc block don't update location. location should be
             # inserted when block is queued and not supposed to change
             continue
         itemsByDbs[get_dbs(item['dbs_url'])].append(item['name'])
     return itemsByDbs
Example #6
0
 def organiseByDbs(self, dataItems):
     """Sort items by dbs instances - return dict with DBSReader as key & data items as values"""
     itemsByDbs = defaultdict(list)
     for item in dataItems:
         if ACDCBlock.checkBlockName(item['name']):
             # if it is acdc block don't update location. location should be
             # inserted when block is queued and not supposed to change
             continue
         itemsByDbs[get_dbs(item['dbs_url'])].append(item['name'])
     return itemsByDbs
Example #7
0
    def execute(self, *args, **kwargs):
        self.logger.info(
            "Data discovery with DBS")  ## to be changed into debug
        dbs = get_dbs(self.config.Services.DBSUrl)
        if kwargs['task']['tm_dbs_url']:
            dbs = get_dbs(kwargs['task']['tm_dbs_url'])
        self.logger.debug("Data discovery through %s for %s" %
                          (dbs, kwargs['task']['tm_taskname']))
        # Get the list of blocks for the locations and then call dls.
        # The WMCore DBS3 implementation makes one call to dls for each block
        # with locations = True
        blocks = [
            x['Name']
            for x in dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'],
                                           locations=False)
        ]
        #Create a map for block's locations: for each block get the list of locations
        ll = dbs.dls.getLocations(list(blocks), showProd=True)
        if len(ll) == 0:
            msg = "No location was found for %s in %s." % (
                kwargs['task']['tm_input_dataset'],
                kwargs['task']['tm_dbs_url'])
            self.logger.error("Setting %s as failed" %
                              str(kwargs['task']['tm_taskname']))
            configreq = {
                'workflow': kwargs['task']['tm_taskname'],
                'status': "FAILED",
                'subresource': 'failure',
                'failure': b64encode(msg)
            }
            self.server.post(self.resturl, data=urllib.urlencode(configreq))
            raise StopHandler(msg)
        locations = map(lambda x: map(lambda y: y.host, x.locations), ll)
        locationsmap = dict(zip(blocks, locations))
        filedetails = dbs.listDatasetFileDetails(
            kwargs['task']['tm_input_dataset'], True)

        return self.formatOutput(task=kwargs['task'],
                                 requestname=kwargs['task']['tm_taskname'],
                                 datasetfiles=filedetails,
                                 locations=locationsmap)
Example #8
0
def test(dbsUrl):
    # super big dataset, 540 blocks and 10775 files
    #datasetPath = '/SingleElectron/Run2012D-v1/RAW'
    # smaller, only 14 blocks and 2954 files
    datasetPath = '/MinBias_TuneZ2star_8TeV-pythia6/Summer12-START50_V13-v3/GEN-SIM'

    dbs = get_dbs(dbsUrl)
    blocks = []
    for block in dbs.listFileBlocks(datasetPath, onlyClosedBlocks=True):
        blocks.append(str(block))

    for blockName in blocks:
        dbs.getDBSSummaryInfo(datasetPath, block=blockName)
        runLumis = dbs.listRunLumis(block=blockName)
        fileInfo = dbs.listFilesInBlock(fileBlockName=blockName)
Example #9
0
def test(dbsUrl):
    # super big dataset, 540 blocks and 10775 files
    #datasetPath = '/SingleElectron/Run2012D-v1/RAW'
    # smaller, only 14 blocks and 2954 files
    datasetPath = '/MinBias_TuneZ2star_8TeV-pythia6/Summer12-START50_V13-v3/GEN-SIM'

    dbs = get_dbs(dbsUrl)
    blocks = []
    for block in dbs.listFileBlocks(datasetPath, onlyClosedBlocks=True):
        blocks.append(str(block))

    for blockName in blocks:
        dbs.getDBSSummaryInfo(datasetPath, block=blockName)
        runLumis = dbs.listRunLumis(block=blockName)
        fileInfo = dbs.listFilesInBlock(fileBlockName=blockName)
Example #10
0
    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0]  #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"],
                                         acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(
                acdcInfo['collection'],
                acdcInfo['fileset'],
                splitedBlockName['Offset'],
                splitedBlockName['NumOfFiles'],
                user=wmspec.getOwner().get("name"),
                group=wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)

            if wmspec.locationDataSourceFlag():
                blockInfo = dbsBlockDict[blockName]
                seElements = []
                for cmsSite in match['Inputs'].values(
                )[0]:  #TODO: Allow more than one
                    ses = self.SiteDB.cmsNametoSE(cmsSite)
                    seElements.extend(ses)
                seElements = list(set(seElements))
                blockInfo['StorageElements'] = seElements
        return blockName, dbsBlockDict[blockName]
Example #11
0
    def execute(self, *args, **kwargs):
        self.logger.info("Data discovery with DBS") ## to be changed into debug
        old_cert_val = os.getenv("X509_USER_CERT")
        old_key_val = os.getenv("X509_USER_KEY")
        try:
            os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert
            os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey
            # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules
            dbsurl = self.config.Services.DBSUrl
            if kwargs['task']['tm_dbs_url']:
                dbsurl = kwargs['task']['tm_dbs_url']
            self.dbs = get_dbs(dbsurl)
            self.dbsInstance = self.dbs.dbs.serverinfo()["dbs_instance"]
        finally:
            if old_cert_val != None:
                os.environ['X509_USER_CERT'] = old_cert_val
            else:
                del os.environ['X509_USER_CERT']
            if old_key_val != None:
                os.environ['X509_USER_KEY'] = old_key_val
            else:
                del os.environ['X509_USER_KEY']
        self.logger.debug("Data discovery through %s for %s" %(self.dbs, kwargs['task']['tm_taskname']))
        self.checkDatasetStatus(kwargs['task']['tm_input_dataset'], kwargs)
        try:
            # Get the list of blocks for the locations and then call dls.
            # The WMCore DBS3 implementation makes one call to dls for each block
            # with locations = True so we are using locations=False and looking up location later
            blocks = [ x['Name'] for x in self.dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)]
        except DBSReaderError as dbsexc:
            #dataset not found in DBS is a known use case
            if str(dbsexc).find('No matching data'):
                raise TaskWorkerException("The CRAB3 server backend could not find dataset %s in this DBS instance: %s" % (kwargs['task']['tm_input_dataset'], dbsurl))
            raise
        ## Create a map for block's locations: for each block get the list of locations.
        ## Note: listFileBlockLocation() gets first the locations from PhEDEx, and if no
        ## locations are found it gets the original locations from DBS. So it should
        ## never be the case at this point that some blocks have no locations.
        try:
            dbsOnly = self.dbsInstance.split('/')[1] != 'global'
            locationsMap = self.dbs.listFileBlockLocation(list(blocks), dbsOnly=dbsOnly)
        except Exception as ex: #TODO should we catch HttpException instead?
            self.logger.exception(ex)
            raise TaskWorkerException("The CRAB3 server backend could not get the location of the files from dbs or phedex.\n"+\
                                "This is could be a temporary phedex/dbs glitch, please try to submit a new task (resubmit will not work)"+\
                                " and contact the experts if the error persists.\nError reason: %s" % str(ex))
        self.keepOnlyDisks(locationsMap)
        if not locationsMap:
            msg = "Task could not be submitted because there is no DISK replica for dataset %s ." % (kwargs['task']['tm_input_dataset'])
            msg += " Please, check DAS, https://cmsweb.cern.ch/das, and make sure the dataset is accessible on DISK"
            msg += " You might want to contact your physics group if you need a disk replica."
            if self.otherLocations:
                msg += "\nN.B.: your dataset is stored at %s, but those are TAPE locations." % ','.join(sorted(self.otherLocations))
            raise TaskWorkerException(msg)
        if len(blocks) != len(locationsMap):
            self.logger.warning("The locations of some blocks have not been found: %s" % (set(blocks) - set(locationsMap)))
        try:
            filedetails = self.dbs.listDatasetFileDetails(kwargs['task']['tm_input_dataset'], getParents=True, validFileOnly=0)

            secondary = kwargs['task'].get('tm_secondary_input_dataset', None)
            if secondary:
                moredetails = self.dbs.listDatasetFileDetails(secondary, getParents=False, validFileOnly=0)

                for secfilename, secinfos in moredetails.items():
                    secinfos['lumiobj'] = LumiList(runsAndLumis=secinfos['Lumis'])

                self.logger.info("Beginning to match files from secondary dataset")
                for dummyFilename, infos in filedetails.items():
                    infos['Parents'] = []
                    lumis = LumiList(runsAndLumis=infos['Lumis'])
                    for secfilename, secinfos in moredetails.items():
                        if (lumis & secinfos['lumiobj']):
                            infos['Parents'].append(secfilename)
                self.logger.info("Done matching files from secondary dataset")
                kwargs['task']['tm_use_parent'] = 1
        except Exception as ex: #TODO should we catch HttpException instead?
            self.logger.exception(ex)
            raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files details (Lumis, events, etc).\n"+\
                                "This is could be a temporary DBS glitch. Please try to submit a new task (resubmit will not work)"+\
                                " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves
        if not filedetails:
            raise TaskWorkerException(("Cannot find any file inside the dataset. Please, check your dataset in DAS, %s.\n"
                                      "Aborting submission. Resubmitting your task will not help.") %
                                      ("https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s") %
                                      (self.dbsInstance, kwargs['task']['tm_input_dataset']))

        ## Format the output creating the data structures required by wmcore. Filters out invalid files,
        ## files whose block has no location, and figures out the PSN
        result = self.formatOutput(task = kwargs['task'], requestname = kwargs['task']['tm_taskname'],
                                   datasetfiles = filedetails, locations = locationsMap,
                                   tempDir = kwargs['tempDir'])

        if not result.result:
            raise TaskWorkerException(("Cannot find any valid file inside the dataset. Please, check your dataset in DAS, %s.\n"
                                      "Aborting submission. Resubmitting your task will not help.") %
                                      ("https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s") %
                                      (self.dbsInstance, kwargs['task']['tm_input_dataset']))

        self.logger.debug("Got %s files" % len(result.result.getFiles()))
        return result
 def dbs(self, dbs_url = None):
     """Get DBSReader"""
     from WMCore.WorkQueue.WorkQueueUtils import get_dbs
     if dbs_url is None:
         dbs_url = self.initialTask.dbsUrl()
     return get_dbs(dbs_url)
Example #13
0
 def dbs(self, dbs_url=None):
     """Get DBSReader"""
     from WMCore.WorkQueue.WorkQueueUtils import get_dbs
     if dbs_url is None:
         dbs_url = self.initialTask.dbsUrl()
     return get_dbs(dbs_url)
Example #14
0
 def execute(self, *args, **kwargs):
     self.logger.info("Data discovery with DBS") ## to be changed into debug
     old_cert_val = os.getenv("X509_USER_CERT")
     old_key_val = os.getenv("X509_USER_KEY")
     os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert
     os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey
     # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules
     dbsurl = self.config.Services.DBSUrl
     if kwargs['task']['tm_dbs_url']:
         dbsurl = kwargs['task']['tm_dbs_url']
     self.dbs = get_dbs(dbsurl)
     #
     if old_cert_val != None:
         os.environ['X509_USER_CERT'] = old_cert_val
     else:
         del os.environ['X509_USER_CERT']
     if old_key_val != None:
         os.environ['X509_USER_KEY'] = old_key_val
     else:
         del os.environ['X509_USER_KEY']
     self.logger.debug("Data discovery through %s for %s" %(self.dbs, kwargs['task']['tm_taskname']))
     datasetStatus = self.checkDatasetStatus(kwargs['task']['tm_input_dataset'], kwargs)
     try:
         # Get the list of blocks for the locations and then call dls.
         # The WMCore DBS3 implementation makes one call to dls for each block
         # with locations = True so we are using locations=False and looking up location later
         blocks = [ x['Name'] for x in self.dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)]
     except DBSReaderError as dbsexc:
         #dataset not found in DBS is a known use case
         if str(dbsexc).find('No matching data'):
             raise TaskWorkerException("The CRAB3 server backend could not find dataset %s in this DBS instance: %s" % (kwargs['task']['tm_input_dataset'], dbsurl))
         raise
     #Create a map for block's locations: for each block get the list of locations
     try:
         locationsMap = self.dbs.listFileBlockLocation(list(blocks), phedexNodes=True)
     except Exception as ex: #TODO should we catch HttpException instead?
         self.logger.exception(ex)
         raise TaskWorkerException("The CRAB3 server backend could not get the location of the files from dbs or phedex.\n"+\
                             "This is could be a temporary phedex/dbs glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex))
     self.keepOnlyDisks(locationsMap)
     if not locationsMap:
         msg = "Task could not be submitted because there is no DISK replica for dataset %s ." % (kwargs['task']['tm_input_dataset'])
         msg += " Please, check DAS, https://cmsweb.cern.ch/das, and make sure the dataset is accessible on DISK"
         msg += " You might want to contact your physics group if you need a disk replica."
         if self.otherLocations:
             msg += "\nN.B.: your dataset is stored at %s, but those are TAPE locations." % ','.join(sorted(self.otherLocations))
         raise TaskWorkerException(msg)
     if len(blocks) != len(locationsMap):
         self.logger.warning("The locations of some blocks have not been found: %s" % (set(blocks) - set(locationsMap)))
     try:
         filedetails = self.dbs.listDatasetFileDetails(kwargs['task']['tm_input_dataset'], True)
     except Exception as ex: #TODO should we catch HttpException instead?
         self.logger.exception(ex)
         raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files deteails (Lumis, events, etc).\n"+\
                             "This is could be a temporary DBS glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves
     if not filedetails:
         raise TaskWorkerException("Cannot find any valid file inside the dataset. Please, check your dataset in DAS, https://cmsweb.cern.ch/das.\n"+\
                                   "Aborting submission. Resubmitting your task will not help.")
     if datasetStatus != 'VALID' and kwargs['task']['tm_nonvalid_input_dataset'] != 'T':
         msg  = "CRAB refuses to run over the input dataset %s, because it is flagged as '%s' in DBS." % (kwargs['task']['tm_input_dataset'], datasetStatus)
         msg += " To allow CRAB to run over a dataset that is not flagged as 'VALID', set Data.allowNonValidInputDataset = True in the CRAB configuration."
         raise TaskWorkerException(msg)
     result = self.formatOutput(task = kwargs['task'], requestname = kwargs['task']['tm_taskname'], datasetfiles = filedetails, locations = locationsMap)
     self.logger.debug("Got %s files" % len(result.result.getFiles()))
     return result
    def execute(self, *args, **kwargs):
        self.logger.info(
            "Data discovery with DBS")  ## to be changed into debug
        old_cert_val = os.getenv("X509_USER_CERT")
        old_key_val = os.getenv("X509_USER_KEY")
        os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert
        os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey
        # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules
        dbsurl = self.config.Services.DBSUrl
        if kwargs['task']['tm_dbs_url']:
            dbsurl = kwargs['task']['tm_dbs_url']
        self.dbs = get_dbs(dbsurl)
        self.dbsInstance = self.dbs.dbs.serverinfo()["dbs_instance"]
        #
        if old_cert_val != None:
            os.environ['X509_USER_CERT'] = old_cert_val
        else:
            del os.environ['X509_USER_CERT']
        if old_key_val != None:
            os.environ['X509_USER_KEY'] = old_key_val
        else:
            del os.environ['X509_USER_KEY']
        self.logger.debug("Data discovery through %s for %s" %
                          (self.dbs, kwargs['task']['tm_taskname']))
        self.checkDatasetStatus(kwargs['task']['tm_input_dataset'], kwargs)
        try:
            # Get the list of blocks for the locations and then call dls.
            # The WMCore DBS3 implementation makes one call to dls for each block
            # with locations = True so we are using locations=False and looking up location later
            blocks = [
                x['Name'] for x in self.dbs.getFileBlocksInfo(
                    kwargs['task']['tm_input_dataset'], locations=False)
            ]
        except DBSReaderError as dbsexc:
            #dataset not found in DBS is a known use case
            if str(dbsexc).find('No matching data'):
                raise TaskWorkerException(
                    "The CRAB3 server backend could not find dataset %s in this DBS instance: %s"
                    % (kwargs['task']['tm_input_dataset'], dbsurl))
            raise
        ## Create a map for block's locations: for each block get the list of locations.
        ## Note: listFileBlockLocation() gets first the locations from PhEDEx, and if no
        ## locations are found it gets the original locations from DBS. So it should
        ## never be the case at this point that some blocks have no locations.
        try:
            locationsMap = self.dbs.listFileBlockLocation(list(blocks),
                                                          phedexNodes=True)
        except Exception as ex:  #TODO should we catch HttpException instead?
            self.logger.exception(ex)
            raise TaskWorkerException("The CRAB3 server backend could not get the location of the files from dbs or phedex.\n"+\
                                "This is could be a temporary phedex/dbs glitch, please try to submit a new task (resubmit will not work)"+\
                                " and contact the experts if the error persists.\nError reason: %s" % str(ex))
        self.keepOnlyDisks(locationsMap)
        if not locationsMap:
            msg = "Task could not be submitted because there is no DISK replica for dataset %s ." % (
                kwargs['task']['tm_input_dataset'])
            msg += " Please, check DAS, https://cmsweb.cern.ch/das, and make sure the dataset is accessible on DISK"
            msg += " You might want to contact your physics group if you need a disk replica."
            if self.otherLocations:
                msg += "\nN.B.: your dataset is stored at %s, but those are TAPE locations." % ','.join(
                    sorted(self.otherLocations))
            raise TaskWorkerException(msg)
        if len(blocks) != len(locationsMap):
            self.logger.warning(
                "The locations of some blocks have not been found: %s" %
                (set(blocks) - set(locationsMap)))
        try:
            filedetails = self.dbs.listDatasetFileDetails(
                kwargs['task']['tm_input_dataset'],
                getParents=True,
                validFileOnly=0)

            secondary = kwargs['task'].get('tm_secondary_input_dataset', None)
            if secondary:
                moredetails = self.dbs.listDatasetFileDetails(secondary,
                                                              getParents=False,
                                                              validFileOnly=0)

                for secfilename, secinfos in moredetails.items():
                    secinfos['lumiobj'] = LumiList(
                        runsAndLumis=secinfos['Lumis'])

                self.logger.info(
                    "Beginning to match files from secondary dataset")
                for filename, infos in filedetails.items():
                    infos['Parents'] = []
                    lumis = LumiList(runsAndLumis=infos['Lumis'])
                    for secfilename, secinfos in moredetails.items():
                        if len(lumis and secinfos['lumiobj']) > 0:
                            infos['Parents'].append(secfilename)
                self.logger.info("Done matching files from secondary dataset")
                kwargs['task']['tm_use_parent'] = 1
        except Exception as ex:  #TODO should we catch HttpException instead?
            self.logger.exception(ex)
            raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files details (Lumis, events, etc).\n"+\
                                "This is could be a temporary DBS glitch. Please try to submit a new task (resubmit will not work)"+\
                                " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves
        if not filedetails:
            raise TaskWorkerException((
                "Cannot find any file inside the dataset. Please, check your dataset in DAS, %s.\n"
                "Aborting submission. Resubmitting your task will not help."
            ) % (
                "https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s"
            ) % (self.dbsInstance, kwargs['task']['tm_input_dataset']))

        ## Format the output creating the data structures required by wmcore. Filters out invalid files,
        ## files whose block has no location, and figures out the PSN
        result = self.formatOutput(task=kwargs['task'],
                                   requestname=kwargs['task']['tm_taskname'],
                                   datasetfiles=filedetails,
                                   locations=locationsMap)

        if not result.result:
            raise TaskWorkerException((
                "Cannot find any valid file inside the dataset. Please, check your dataset in DAS, %s.\n"
                "Aborting submission. Resubmitting your task will not help."
            ) % (
                "https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s"
            ) % (self.dbsInstance, kwargs['task']['tm_input_dataset']))

        self.logger.debug("Got %s files" % len(result.result.getFiles()))
        return result