def execute(self, *args, **kwargs): self.logger.info("Data discovery with DBS") ## to be changed into debug dbs = get_dbs(self.config.Services.DBSUrl) if kwargs['task']['tm_dbs_url']: dbs = get_dbs(kwargs['task']['tm_dbs_url']) self.logger.debug("Data discovery through %s for %s" %(dbs, kwargs['task']['tm_taskname'])) # Get the list of blocks for the locations and then call dls. # The WMCore DBS3 implementation makes one call to dls for each block # with locations = True blocks = [ x['Name'] for x in dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)] #Create a map for block's locations: for each block get the list of locations ll = dbs.dls.getLocations(list(blocks), showProd = True) if len(ll) == 0: msg = "No location was found for %s in %s." %(kwargs['task']['tm_input_dataset'],kwargs['task']['tm_dbs_url']) self.logger.error("Setting %s as failed" % str(kwargs['task']['tm_taskname'])) configreq = {'workflow': kwargs['task']['tm_taskname'], 'status': "FAILED", 'subresource': 'failure', 'failure': b64encode(msg)} self.server.post(self.resturl, data = urllib.urlencode(configreq)) raise StopHandler(msg) locations = map(lambda x: map(lambda y: y.host, x.locations), ll) locationsmap = dict(zip(blocks, locations)) filedetails = dbs.listDatasetFileDetails(kwargs['task']['tm_input_dataset'], True) return self.formatOutput(task=kwargs['task'], requestname=kwargs['task']['tm_taskname'], datasetfiles=filedetails, locations=locationsmap)
def _getDBSBlock(self, match, wmspec): """Get DBS info for this block""" blockName = match['Inputs'].keys()[0] #TODO: Allow more than one if match['ACDC']: acdcInfo = match['ACDC'] acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) collection = acdc.getDataCollection(acdcInfo['collection']) splitedBlockName = ACDCBlock.splitBlockName(blockName) fileLists = acdc.getChunkFiles(acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles'], user = wmspec.getOwner().get("name"), group = wmspec.getOwner().get("group")) block = {} block["Files"] = fileLists return blockName, block else: dbs = get_dbs(match['Dbs']) if wmspec.getTask(match['TaskName']).parentProcessingFlag(): dbsBlockDict = dbs.getFileBlockWithParents(blockName) else: dbsBlockDict = dbs.getFileBlock(blockName) return blockName, dbsBlockDict[blockName]
def execute(self, *args, **kwargs): self.logger.info("Data discovery with DBS") ## to be changed into debug old_cert_val = os.getenv("X509_USER_CERT") old_key_val = os.getenv("X509_USER_KEY") os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules dbsurl = self.config.Services.DBSUrl if kwargs['task']['tm_dbs_url']: dbsurl = kwargs['task']['tm_dbs_url'] dbs = get_dbs(dbsurl) # if old_cert_val != None: os.environ['X509_USER_CERT'] = old_cert_val else: del os.environ['X509_USER_CERT'] if old_key_val != None: os.environ['X509_USER_KEY'] = old_key_val else: del os.environ['X509_USER_KEY'] self.logger.debug("Data discovery through %s for %s" %(dbs, kwargs['task']['tm_taskname'])) try: # Get the list of blocks for the locations and then call dls. # The WMCore DBS3 implementation makes one call to dls for each block # with locations = True so we are using locations=False and looking up location later blocks = [ x['Name'] for x in dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)] except DBSReaderError, dbsexc: #dataset not found in DBS is a known use case if str(dbsexc).find('No matching data'): raise TaskWorkerException("The CRAB3 server backend could not could not find dataset %s in this DBS instance: %s" % (kwargs['task']['tm_input_dataset'], dbsurl)) raise
def _getDBSBlock(self, match, wmspec): """Get DBS info for this block""" blockName = match['Inputs'].keys()[0] #TODO: Allow more than one if match['ACDC']: acdcInfo = match['ACDC'] acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) collection = acdc.getDataCollection(acdcInfo['collection']) splitedBlockName = ACDCBlock.splitBlockName(blockName) fileLists = acdc.getChunkFiles(acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles'], user = wmspec.getOwner().get("name"), group = wmspec.getOwner().get("group")) block = {} block["Files"] = fileLists return blockName, block else: dbs = get_dbs(match['Dbs']) if wmspec.getTask(match['TaskName']).parentProcessingFlag(): dbsBlockDict = dbs.getFileBlockWithParents(blockName) else: dbsBlockDict = dbs.getFileBlock(blockName) if wmspec.locationDataSourceFlag(): blockInfo = dbsBlockDict[blockName] seElements = [] for cmsSite in match['Inputs'].values()[0]: #TODO: Allow more than one ses = self.SiteDB.cmsNametoSE(cmsSite) seElements.extend(ses) seElements = list(set(seElements)) blockInfo['StorageElements'] = seElements return blockName, dbsBlockDict[blockName]
def organiseByDbs(self, dataItems): """Sort items by dbs instances - return dict with DBSReader as key & data items as values""" itemsByDbs = defaultdict(list) for item in dataItems: if ACDCBlock.checkBlockName(item['name']): # if it is acdc block don't update location. location should be # inserted when block is queued and not supposed to change continue itemsByDbs[get_dbs(item['dbs_url'])].append(item['name']) return itemsByDbs
def execute(self, *args, **kwargs): self.logger.info( "Data discovery with DBS") ## to be changed into debug dbs = get_dbs(self.config.Services.DBSUrl) if kwargs['task']['tm_dbs_url']: dbs = get_dbs(kwargs['task']['tm_dbs_url']) self.logger.debug("Data discovery through %s for %s" % (dbs, kwargs['task']['tm_taskname'])) # Get the list of blocks for the locations and then call dls. # The WMCore DBS3 implementation makes one call to dls for each block # with locations = True blocks = [ x['Name'] for x in dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False) ] #Create a map for block's locations: for each block get the list of locations ll = dbs.dls.getLocations(list(blocks), showProd=True) if len(ll) == 0: msg = "No location was found for %s in %s." % ( kwargs['task']['tm_input_dataset'], kwargs['task']['tm_dbs_url']) self.logger.error("Setting %s as failed" % str(kwargs['task']['tm_taskname'])) configreq = { 'workflow': kwargs['task']['tm_taskname'], 'status': "FAILED", 'subresource': 'failure', 'failure': b64encode(msg) } self.server.post(self.resturl, data=urllib.urlencode(configreq)) raise StopHandler(msg) locations = map(lambda x: map(lambda y: y.host, x.locations), ll) locationsmap = dict(zip(blocks, locations)) filedetails = dbs.listDatasetFileDetails( kwargs['task']['tm_input_dataset'], True) return self.formatOutput(task=kwargs['task'], requestname=kwargs['task']['tm_taskname'], datasetfiles=filedetails, locations=locationsmap)
def test(dbsUrl): # super big dataset, 540 blocks and 10775 files #datasetPath = '/SingleElectron/Run2012D-v1/RAW' # smaller, only 14 blocks and 2954 files datasetPath = '/MinBias_TuneZ2star_8TeV-pythia6/Summer12-START50_V13-v3/GEN-SIM' dbs = get_dbs(dbsUrl) blocks = [] for block in dbs.listFileBlocks(datasetPath, onlyClosedBlocks=True): blocks.append(str(block)) for blockName in blocks: dbs.getDBSSummaryInfo(datasetPath, block=blockName) runLumis = dbs.listRunLumis(block=blockName) fileInfo = dbs.listFilesInBlock(fileBlockName=blockName)
def _getDBSBlock(self, match, wmspec): """Get DBS info for this block""" blockName = match['Inputs'].keys()[0] #TODO: Allow more than one if match['ACDC']: acdcInfo = match['ACDC'] acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) collection = acdc.getDataCollection(acdcInfo['collection']) splitedBlockName = ACDCBlock.splitBlockName(blockName) fileLists = acdc.getChunkFiles( acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles'], user=wmspec.getOwner().get("name"), group=wmspec.getOwner().get("group")) block = {} block["Files"] = fileLists return blockName, block else: dbs = get_dbs(match['Dbs']) if wmspec.getTask(match['TaskName']).parentProcessingFlag(): dbsBlockDict = dbs.getFileBlockWithParents(blockName) else: dbsBlockDict = dbs.getFileBlock(blockName) if wmspec.locationDataSourceFlag(): blockInfo = dbsBlockDict[blockName] seElements = [] for cmsSite in match['Inputs'].values( )[0]: #TODO: Allow more than one ses = self.SiteDB.cmsNametoSE(cmsSite) seElements.extend(ses) seElements = list(set(seElements)) blockInfo['StorageElements'] = seElements return blockName, dbsBlockDict[blockName]
def execute(self, *args, **kwargs): self.logger.info("Data discovery with DBS") ## to be changed into debug old_cert_val = os.getenv("X509_USER_CERT") old_key_val = os.getenv("X509_USER_KEY") try: os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules dbsurl = self.config.Services.DBSUrl if kwargs['task']['tm_dbs_url']: dbsurl = kwargs['task']['tm_dbs_url'] self.dbs = get_dbs(dbsurl) self.dbsInstance = self.dbs.dbs.serverinfo()["dbs_instance"] finally: if old_cert_val != None: os.environ['X509_USER_CERT'] = old_cert_val else: del os.environ['X509_USER_CERT'] if old_key_val != None: os.environ['X509_USER_KEY'] = old_key_val else: del os.environ['X509_USER_KEY'] self.logger.debug("Data discovery through %s for %s" %(self.dbs, kwargs['task']['tm_taskname'])) self.checkDatasetStatus(kwargs['task']['tm_input_dataset'], kwargs) try: # Get the list of blocks for the locations and then call dls. # The WMCore DBS3 implementation makes one call to dls for each block # with locations = True so we are using locations=False and looking up location later blocks = [ x['Name'] for x in self.dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)] except DBSReaderError as dbsexc: #dataset not found in DBS is a known use case if str(dbsexc).find('No matching data'): raise TaskWorkerException("The CRAB3 server backend could not find dataset %s in this DBS instance: %s" % (kwargs['task']['tm_input_dataset'], dbsurl)) raise ## Create a map for block's locations: for each block get the list of locations. ## Note: listFileBlockLocation() gets first the locations from PhEDEx, and if no ## locations are found it gets the original locations from DBS. So it should ## never be the case at this point that some blocks have no locations. try: dbsOnly = self.dbsInstance.split('/')[1] != 'global' locationsMap = self.dbs.listFileBlockLocation(list(blocks), dbsOnly=dbsOnly) except Exception as ex: #TODO should we catch HttpException instead? self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not get the location of the files from dbs or phedex.\n"+\ "This is could be a temporary phedex/dbs glitch, please try to submit a new task (resubmit will not work)"+\ " and contact the experts if the error persists.\nError reason: %s" % str(ex)) self.keepOnlyDisks(locationsMap) if not locationsMap: msg = "Task could not be submitted because there is no DISK replica for dataset %s ." % (kwargs['task']['tm_input_dataset']) msg += " Please, check DAS, https://cmsweb.cern.ch/das, and make sure the dataset is accessible on DISK" msg += " You might want to contact your physics group if you need a disk replica." if self.otherLocations: msg += "\nN.B.: your dataset is stored at %s, but those are TAPE locations." % ','.join(sorted(self.otherLocations)) raise TaskWorkerException(msg) if len(blocks) != len(locationsMap): self.logger.warning("The locations of some blocks have not been found: %s" % (set(blocks) - set(locationsMap))) try: filedetails = self.dbs.listDatasetFileDetails(kwargs['task']['tm_input_dataset'], getParents=True, validFileOnly=0) secondary = kwargs['task'].get('tm_secondary_input_dataset', None) if secondary: moredetails = self.dbs.listDatasetFileDetails(secondary, getParents=False, validFileOnly=0) for secfilename, secinfos in moredetails.items(): secinfos['lumiobj'] = LumiList(runsAndLumis=secinfos['Lumis']) self.logger.info("Beginning to match files from secondary dataset") for dummyFilename, infos in filedetails.items(): infos['Parents'] = [] lumis = LumiList(runsAndLumis=infos['Lumis']) for secfilename, secinfos in moredetails.items(): if (lumis & secinfos['lumiobj']): infos['Parents'].append(secfilename) self.logger.info("Done matching files from secondary dataset") kwargs['task']['tm_use_parent'] = 1 except Exception as ex: #TODO should we catch HttpException instead? self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files details (Lumis, events, etc).\n"+\ "This is could be a temporary DBS glitch. Please try to submit a new task (resubmit will not work)"+\ " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves if not filedetails: raise TaskWorkerException(("Cannot find any file inside the dataset. Please, check your dataset in DAS, %s.\n" "Aborting submission. Resubmitting your task will not help.") % ("https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s") % (self.dbsInstance, kwargs['task']['tm_input_dataset'])) ## Format the output creating the data structures required by wmcore. Filters out invalid files, ## files whose block has no location, and figures out the PSN result = self.formatOutput(task = kwargs['task'], requestname = kwargs['task']['tm_taskname'], datasetfiles = filedetails, locations = locationsMap, tempDir = kwargs['tempDir']) if not result.result: raise TaskWorkerException(("Cannot find any valid file inside the dataset. Please, check your dataset in DAS, %s.\n" "Aborting submission. Resubmitting your task will not help.") % ("https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s") % (self.dbsInstance, kwargs['task']['tm_input_dataset'])) self.logger.debug("Got %s files" % len(result.result.getFiles())) return result
def dbs(self, dbs_url = None): """Get DBSReader""" from WMCore.WorkQueue.WorkQueueUtils import get_dbs if dbs_url is None: dbs_url = self.initialTask.dbsUrl() return get_dbs(dbs_url)
def dbs(self, dbs_url=None): """Get DBSReader""" from WMCore.WorkQueue.WorkQueueUtils import get_dbs if dbs_url is None: dbs_url = self.initialTask.dbsUrl() return get_dbs(dbs_url)
def execute(self, *args, **kwargs): self.logger.info("Data discovery with DBS") ## to be changed into debug old_cert_val = os.getenv("X509_USER_CERT") old_key_val = os.getenv("X509_USER_KEY") os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules dbsurl = self.config.Services.DBSUrl if kwargs['task']['tm_dbs_url']: dbsurl = kwargs['task']['tm_dbs_url'] self.dbs = get_dbs(dbsurl) # if old_cert_val != None: os.environ['X509_USER_CERT'] = old_cert_val else: del os.environ['X509_USER_CERT'] if old_key_val != None: os.environ['X509_USER_KEY'] = old_key_val else: del os.environ['X509_USER_KEY'] self.logger.debug("Data discovery through %s for %s" %(self.dbs, kwargs['task']['tm_taskname'])) datasetStatus = self.checkDatasetStatus(kwargs['task']['tm_input_dataset'], kwargs) try: # Get the list of blocks for the locations and then call dls. # The WMCore DBS3 implementation makes one call to dls for each block # with locations = True so we are using locations=False and looking up location later blocks = [ x['Name'] for x in self.dbs.getFileBlocksInfo(kwargs['task']['tm_input_dataset'], locations=False)] except DBSReaderError as dbsexc: #dataset not found in DBS is a known use case if str(dbsexc).find('No matching data'): raise TaskWorkerException("The CRAB3 server backend could not find dataset %s in this DBS instance: %s" % (kwargs['task']['tm_input_dataset'], dbsurl)) raise #Create a map for block's locations: for each block get the list of locations try: locationsMap = self.dbs.listFileBlockLocation(list(blocks), phedexNodes=True) except Exception as ex: #TODO should we catch HttpException instead? self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not get the location of the files from dbs or phedex.\n"+\ "This is could be a temporary phedex/dbs glitch, please try to submit a new task (resubmit will not work)"+\ " and contact the experts if the error persists.\nError reason: %s" % str(ex)) self.keepOnlyDisks(locationsMap) if not locationsMap: msg = "Task could not be submitted because there is no DISK replica for dataset %s ." % (kwargs['task']['tm_input_dataset']) msg += " Please, check DAS, https://cmsweb.cern.ch/das, and make sure the dataset is accessible on DISK" msg += " You might want to contact your physics group if you need a disk replica." if self.otherLocations: msg += "\nN.B.: your dataset is stored at %s, but those are TAPE locations." % ','.join(sorted(self.otherLocations)) raise TaskWorkerException(msg) if len(blocks) != len(locationsMap): self.logger.warning("The locations of some blocks have not been found: %s" % (set(blocks) - set(locationsMap))) try: filedetails = self.dbs.listDatasetFileDetails(kwargs['task']['tm_input_dataset'], True) except Exception as ex: #TODO should we catch HttpException instead? self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files deteails (Lumis, events, etc).\n"+\ "This is could be a temporary DBS glitch, please try to submit a new task (resubmit will not work)"+\ " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves if not filedetails: raise TaskWorkerException("Cannot find any valid file inside the dataset. Please, check your dataset in DAS, https://cmsweb.cern.ch/das.\n"+\ "Aborting submission. Resubmitting your task will not help.") if datasetStatus != 'VALID' and kwargs['task']['tm_nonvalid_input_dataset'] != 'T': msg = "CRAB refuses to run over the input dataset %s, because it is flagged as '%s' in DBS." % (kwargs['task']['tm_input_dataset'], datasetStatus) msg += " To allow CRAB to run over a dataset that is not flagged as 'VALID', set Data.allowNonValidInputDataset = True in the CRAB configuration." raise TaskWorkerException(msg) result = self.formatOutput(task = kwargs['task'], requestname = kwargs['task']['tm_taskname'], datasetfiles = filedetails, locations = locationsMap) self.logger.debug("Got %s files" % len(result.result.getFiles())) return result
def execute(self, *args, **kwargs): self.logger.info( "Data discovery with DBS") ## to be changed into debug old_cert_val = os.getenv("X509_USER_CERT") old_key_val = os.getenv("X509_USER_KEY") os.environ['X509_USER_CERT'] = self.config.TaskWorker.cmscert os.environ['X509_USER_KEY'] = self.config.TaskWorker.cmskey # DBS3 requires X509_USER_CERT to be set - but we don't want to leak that to other modules dbsurl = self.config.Services.DBSUrl if kwargs['task']['tm_dbs_url']: dbsurl = kwargs['task']['tm_dbs_url'] self.dbs = get_dbs(dbsurl) self.dbsInstance = self.dbs.dbs.serverinfo()["dbs_instance"] # if old_cert_val != None: os.environ['X509_USER_CERT'] = old_cert_val else: del os.environ['X509_USER_CERT'] if old_key_val != None: os.environ['X509_USER_KEY'] = old_key_val else: del os.environ['X509_USER_KEY'] self.logger.debug("Data discovery through %s for %s" % (self.dbs, kwargs['task']['tm_taskname'])) self.checkDatasetStatus(kwargs['task']['tm_input_dataset'], kwargs) try: # Get the list of blocks for the locations and then call dls. # The WMCore DBS3 implementation makes one call to dls for each block # with locations = True so we are using locations=False and looking up location later blocks = [ x['Name'] for x in self.dbs.getFileBlocksInfo( kwargs['task']['tm_input_dataset'], locations=False) ] except DBSReaderError as dbsexc: #dataset not found in DBS is a known use case if str(dbsexc).find('No matching data'): raise TaskWorkerException( "The CRAB3 server backend could not find dataset %s in this DBS instance: %s" % (kwargs['task']['tm_input_dataset'], dbsurl)) raise ## Create a map for block's locations: for each block get the list of locations. ## Note: listFileBlockLocation() gets first the locations from PhEDEx, and if no ## locations are found it gets the original locations from DBS. So it should ## never be the case at this point that some blocks have no locations. try: locationsMap = self.dbs.listFileBlockLocation(list(blocks), phedexNodes=True) except Exception as ex: #TODO should we catch HttpException instead? self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not get the location of the files from dbs or phedex.\n"+\ "This is could be a temporary phedex/dbs glitch, please try to submit a new task (resubmit will not work)"+\ " and contact the experts if the error persists.\nError reason: %s" % str(ex)) self.keepOnlyDisks(locationsMap) if not locationsMap: msg = "Task could not be submitted because there is no DISK replica for dataset %s ." % ( kwargs['task']['tm_input_dataset']) msg += " Please, check DAS, https://cmsweb.cern.ch/das, and make sure the dataset is accessible on DISK" msg += " You might want to contact your physics group if you need a disk replica." if self.otherLocations: msg += "\nN.B.: your dataset is stored at %s, but those are TAPE locations." % ','.join( sorted(self.otherLocations)) raise TaskWorkerException(msg) if len(blocks) != len(locationsMap): self.logger.warning( "The locations of some blocks have not been found: %s" % (set(blocks) - set(locationsMap))) try: filedetails = self.dbs.listDatasetFileDetails( kwargs['task']['tm_input_dataset'], getParents=True, validFileOnly=0) secondary = kwargs['task'].get('tm_secondary_input_dataset', None) if secondary: moredetails = self.dbs.listDatasetFileDetails(secondary, getParents=False, validFileOnly=0) for secfilename, secinfos in moredetails.items(): secinfos['lumiobj'] = LumiList( runsAndLumis=secinfos['Lumis']) self.logger.info( "Beginning to match files from secondary dataset") for filename, infos in filedetails.items(): infos['Parents'] = [] lumis = LumiList(runsAndLumis=infos['Lumis']) for secfilename, secinfos in moredetails.items(): if len(lumis and secinfos['lumiobj']) > 0: infos['Parents'].append(secfilename) self.logger.info("Done matching files from secondary dataset") kwargs['task']['tm_use_parent'] = 1 except Exception as ex: #TODO should we catch HttpException instead? self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files details (Lumis, events, etc).\n"+\ "This is could be a temporary DBS glitch. Please try to submit a new task (resubmit will not work)"+\ " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves if not filedetails: raise TaskWorkerException(( "Cannot find any file inside the dataset. Please, check your dataset in DAS, %s.\n" "Aborting submission. Resubmitting your task will not help." ) % ( "https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s" ) % (self.dbsInstance, kwargs['task']['tm_input_dataset'])) ## Format the output creating the data structures required by wmcore. Filters out invalid files, ## files whose block has no location, and figures out the PSN result = self.formatOutput(task=kwargs['task'], requestname=kwargs['task']['tm_taskname'], datasetfiles=filedetails, locations=locationsMap) if not result.result: raise TaskWorkerException(( "Cannot find any valid file inside the dataset. Please, check your dataset in DAS, %s.\n" "Aborting submission. Resubmitting your task will not help." ) % ( "https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s" ) % (self.dbsInstance, kwargs['task']['tm_input_dataset'])) self.logger.debug("Got %s files" % len(result.result.getFiles())) return result