Ejemplo n.º 1
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({
            "key": self.config.TaskWorker.cmskey,
            "cert": self.config.TaskWorker.cmscert
        })

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=size,
                          checksums=checksums,
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" % pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError:
                        self.logger.error(
                            "Impossible translating %s to a CMS name through SiteDB"
                            % pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException as ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print("Couldn't map SE to site: %s" % pnn)
                        print("got problem: %s" % ex)
                        print("got another problem: %s" % ex.__dict__)
                if pnn and pnn in pnn_psn_map:
                    if isinstance(pnn_psn_map[pnn], list):
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' %
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
Ejemplo n.º 2
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {
            "cacheduration": 1,
            "pycurl": True
        }  # cache duration is in hours
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[
                        infos['BlockName']]:
                    self.logger.warning(
                        "Skipping %s because its block (%s) has no locations",
                        lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue
                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    self.logger.warning(
                        "Skipping %s because it has no parents")
                    continue
                ## Create a WMCore File object.
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
                wmfile = File(lfn=lfn,
                              events=infos['NumberOfEvents'],
                              size=size,
                              checksums=checksums,
                              parents=infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                        locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error(
                        "Impossible translating %s to a CMS name through CMS Resource Catalog",
                        locations[wmfile['block']])
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "%d blocks will be skipped because are not completely replicated on DISK: %s" % (
                len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d',
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
Ejemplo n.º 3
0
    def formatOutput(self, task, requestname, datasetfiles, locations, tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {"cacheduration": 1, "pycurl": True} # cache duration is in hours
        resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[infos['BlockName']]:
                    self.logger.warning("Skipping %s because its block (%s) has no locations", lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue

                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    raise TaskWorkerException(
                            "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" +
                            "because you specified useParents=True but some your files have no" +
                            "parents.\nExample: " + lfn)
                ## Create a WMCore File object.
                try:
                    size = infos['FileSize']
                    checksums = {'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5']}
                except:
                    #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                    # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                    #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                    size = infos['Size']
                    checksums = infos['Checksums']
                wmfile = File(lfn = lfn, events = infos['NumberOfEvents'], size = size, checksums = checksums, parents = infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error("Impossible translating %s to a CMS name through CMS Resource Catalog", locations[wmfile['block']] )
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "The locations of some blocks (%d) have not been found: %s" % (len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d', (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug("Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList()
        self.logger.debug("Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"), "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"), "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task = task, result = Fileset(name = 'FilesToSplit', files = set(wmfiles)))
Ejemplo n.º 4
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {
            "cacheduration": 1,
            "pycurl": True
        }  # cache duration is in hours
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[
                        infos['BlockName']]:
                    self.logger.warning(
                        "Skipping %s because its block (%s) has no locations",
                        lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue

                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    raise TaskWorkerException(
                        "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                        +
                        "because you specified useParents=True but some your files have no"
                        + "parents.\nExample: " + lfn)
                ## Create a WMCore File object.
                try:
                    size = infos['FileSize']
                    checksums = {
                        'Checksum': infos['Checksum'],
                        'Adler32': infos['Adler32'],
                        'Md5': infos['Md5']
                    }
                except:
                    #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                    # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                    #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                    size = infos['Size']
                    checksums = infos['Checksums']
                wmfile = File(lfn=lfn,
                              events=infos['NumberOfEvents'],
                              size=size,
                              checksums=checksums,
                              parents=infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                        locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error(
                        "Impossible translating %s to a CMS name through CMS Resource Catalog",
                        locations[wmfile['block']])
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "%d blocks will be skipped because are not completely replicated on DISK: %s" % (
                len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d',
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
Ejemplo n.º 5
0
    def formatOutput(self, task, requestname, datasetfiles, locations, tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({"key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert})

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[infos['BlockName']]:
                self.logger.warning("Skipping %s because its block (%s) has no locations" % (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                        "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" +
                        "because you specified useParents=True but some your files have no" +
                        "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5']}
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn = lfn, events = infos['NumberOfEvents'], size = size, checksums = checksums, parents = infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" %pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError:
                        self.logger.error("Impossible translating %s to a CMS name through SiteDB" %pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException as ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print("Couldn't map SE to site: %s" % pnn)
                        print("got problem: %s" % ex)
                        print("got another problem: %s" % ex.__dict__)
                if pnn and pnn in pnn_psn_map:
                    if isinstance(pnn_psn_map[pnn], list):
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' % (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug("Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList()
        self.logger.debug("Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"), "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"), "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task = task, result = Fileset(name = 'FilesToSplit', files = set(wmfiles)))