Example #1
0
def getLumiListInValidFiles(dataset, dbsurl = 'phys03'):
    """
    Get the runs/lumis in the valid files of a given dataset.

    dataset: the dataset name as published in DBS
    dbsurl: the DBS URL or DBS prod instance

    Returns a LumiList object.
    """
    dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl)
    dbs3api = DbsApi(url=dbsurl)
    try:
        files = dbs3api.listFileArray(dataset=dataset, validFileOnly=0, detail=True)
    except Exception as ex:
        msg  = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % (dataset, dbsurl, ex)
        msg += "\n%s" % (traceback.format_exc())
        raise ClientException(msg)
    if not files:
        msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl)
        raise ClientException(msg)
    validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']]
    blocks = set([f['block_name'] for f in files])
    runLumiPairs = []
    for blockName in blocks:
        fileLumis = dbs3api.listFileLumis(block_name=blockName)
        for f in fileLumis:
            if f['logical_file_name'] in validFiles:
                run = f['run_num']
                lumis = f['lumi_section_num']
                for lumi in lumis:
                    runLumiPairs.append((run,lumi))
    lumiList = LumiList(lumis=runLumiPairs)

    return lumiList
Example #2
0
def getCMSRunAnalysisOpts(ad, dag, job=1, events=10):
    """
    Parse the job ad to obtain the arguments that were passed to condor.
    """

    set_re = re.compile(r'\+?(\w+)\s*=\s*(.*)$')

    info = {}
    with open(ad) as f:
        for line in f:
            m = set_re.match(line)
            if not m:
                continue
            key, value = m.groups()
            # Somehow, Condor likes doubled double quotes?
            info[key] = value.strip("'\"").replace('""', '"')
    with open(dag) as f:
        for line in f:
            if line.startswith('VARS Job{job}'.format(job=job)):
                break
        else:
            raise ClientException('Dry run failed to execute parse DAG description.')
        for setting in shlex.split(line):
            m = set_re.match(setting)
            if not m:
                continue
            key, value = m.groups()
            info[key] = value.replace('""', '"')

    info.update({'CRAB_Id': '0', 'firstEvent': '1', 'lastEvent': str(int(events) + 1)})

    args = shlex.split(info['Arguments'])
    def repl(match):
        return info[match.group(1)]
    return [re.sub(r'\$\((\w+)\)', repl, arg) for arg in args]
Example #3
0
    def processAndStoreJobIds(self):
        """
        Call the status command to check that the jobids passed by the user are in a valid
        state to retrieve files. Otherwise, if no jobids are passed by the user, populate the
        list with all possible jobids.

        Also store some information which is used later when deciding the correct pfn.
        """
        statusDict = getMutedStatusInfo(self.logger)
        jobList = statusDict['jobList']
        if not jobList:
            msg = "Cannot retrieve job list from the status command."
            raise ClientException(msg)

        transferringIds = [x[1] for x in jobList if x[0] in ['transferring', 'cooloff', 'held']]
        finishedIds = [x[1] for x in jobList if x[0] in ['finished', 'failed', 'transferred']]
        possibleJobIds = transferringIds + finishedIds

        if self.options.jobids:
            for jobid in self.options.jobids:
                if not str(jobid[1]) in possibleJobIds:
                    raise ConfigurationException("The job with id %s is not in a valid state to retrieve output files" % jobid[1])
        else:
            ## If the user does not give us jobids, set them to all possible ids.
            self.options.jobids = []
            for jobid in possibleJobIds:
                self.options.jobids.append(('jobids', jobid))

        if len(self.options.jobids) > 500:
            msg = "You requested to process files for %d jobs." % len(self.options.jobids)
            msg += "\nThe limit is 500. Please use the '--jobids'"
            msg += "option to select up to 500 jobs."
            raise ConfigurationException(msg)

        self.transferringIds = transferringIds
Example #4
0
    def executeTestRun(self, inputArgs, jobnr):
        """ Execute a test run calling CMSRunAnalysis.sh
        """
        env = os.environ.update({'CRAB3_RUNTIME_DEBUG': 'True', '_CONDOR_JOB_AD': 'Job.submit'})

        opts = [
            os.path.join(os.getcwd(), 'TweakPSet.py'),
            '-a %s' % inputArgs[jobnr-1]['CRAB_Archive'],
            '-o %s' % inputArgs[jobnr-1]['CRAB_AdditionalOutputFiles'],
            '--sourceURL=%s' % inputArgs[jobnr-1]['CRAB_ISB'],
            '--location=%s' % os.getcwd(),
            '--inputFile=%s' % inputArgs[jobnr-1]['inputFiles'],
            '--runAndLumis=%s' % inputArgs[jobnr-1]['runAndLumiMask'],
            '--firstEvent=%s' % inputArgs[jobnr-1]['firstEvent'], #jobs goes from 1 to N, inputArgs from 0 to N-1
            '--lastEvent=%s' % inputArgs[jobnr-1]['lastEvent'],
            '--firstLumi=%s' % inputArgs[jobnr-1]['firstLumi'],
            '--firstRun=%s' % inputArgs[jobnr-1]['firstRun'],
            '--seeding=%s' % inputArgs[jobnr-1]['seeding'],
            '--lheInputFiles=%s' % inputArgs[jobnr-1]['lheInputFiles'],
            '--oneEventMode=0',
            '--eventsPerLumi=%s' % inputArgs[jobnr-1]['eventsPerLumi'],
            '--maxRuntime=-1',
            '--jobNumber=%s' % (jobnr-1),
            '--cmsswVersion=%s' % inputArgs[jobnr-1]['CRAB_JobSW'],
            '--scramArch=%s' % inputArgs[jobnr-1]['CRAB_JobArch'],
            '--scriptExe=%s' % inputArgs[jobnr-1]['scriptExe'],
            '--scriptArgs=%s' % inputArgs[jobnr-1]['scriptArgs'],
        ]

        s = subprocess.Popen(['sh', 'CMSRunAnalysis.sh'] + opts, env=env, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        out, err = s.communicate()
        self.logger.debug(out)
        self.logger.debug(err)
        if s.returncode != 0:
            raise ClientException('Failed to execute local test run:\n StdOut: %s\n StdErr: %s' % (out, err))
Example #5
0
def getFileFromURL(url, filename = None, proxyfilename = None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required
    """
    parsedUrl = urlparse(url)
    if filename == None:
        path = parsedUrl.path
        filename = os.path.basename(path)

    data = getDataFromURL(url, proxyfilename)

    if data:
        try:
            with open(filename, 'a') as f:
                f.seek(0)
                f.truncate()
                f.write(data)
        except IOError as ex:
            logger = logging.getLogger('CRAB3')
            logger.exception(ex)
            msg = "Error while writing %s. Got:\n%s" \
                    % (filename, ex)
            raise ClientException(msg)

    return filename
Example #6
0
    def getInputFiles(self):
        """ Get the InputFiles.tar.gz and extract the necessary files
        """
        taskname = self.cachedinfo['RequestName']

        #Get task status from the task DB
        self.logger.debug("Getting status from he DB")
        uri = self.getUrl(self.instance, resource = 'task')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version = __version__)
        crabDBInfo, _, _ =  server.get(uri, data = {'subresource': 'search', 'workflow': taskname})
        status = getColumn(crabDBInfo, 'tm_task_status')
        self.destination = getColumn(crabDBInfo, 'tm_asyncdest')

        inputsFilename = os.path.join(os.getcwd(), 'InputFiles.tar.gz')
        if status == 'UPLOADED':
            filecacheurl = getColumn(crabDBInfo, 'tm_cache_url')
            ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
            self.logger.debug("Downloading and extracting 'dry-run-sandbox.tar.gz' from %s" % filecacheurl)
            ufc.downloadLog('dry-run-sandbox.tar.gz', output=os.path.join(os.getcwd(), 'dry-run-sandbox.tar.gz'))
            with tarfile.open('dry-run-sandbox.tar.gz') as tf:
                tf.extractall()
        elif status == 'SUBMITTED':
            webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug)
            if not webdir:
                webdir = getColumn(crabDBInfo, 'tm_user_webdir')
            self.logger.debug("Downloading 'InputFiles.tar.gz' from %s" % webdir)
            getFileFromURL(webdir + '/InputFiles.tar.gz', inputsFilename, self.proxyfilename)
        else:
            raise ClientException('Can only execute jobs from tasks in status SUBMITTED or UPLOADED. Current status is %s' % status)

        for name in [inputsFilename, 'CMSRunAnalysis.tar.gz', 'sandbox.tar.gz']:
            with tarfile.open(name) as tf:
                tf.extractall()
Example #7
0
    def validateOptions(self):
        SubCommand.validateOptions(self)

        if self.options.jobid is not None:
            try:
                int(self.options.jobid)
            except ValueError:
               raise ClientException("The --jobid option has to be an integer")
Example #8
0
def getFileFromURL(url, filename = None, proxyfilename = None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file = proxyfilename, cert_file = proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        # Read the file by chunks instead of all at once, appending each chunk to the final result.
        # This lowers the memory overhead, which can be a problem with big files.
        with open (filename, 'a') as f:
            f.seek(0)
            f.truncate()
            while True:
                piece = socket.read(1024)
                if not piece:
                    break
                f.write(piece)
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException("Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s" % (url, status, socket.info()))
        exc.status = status
        raise exc
    return filename
Example #9
0
 def myPerform(self, curl, url):
     import pycurl
     try:
         curl.perform()
     except pycurl.error as e:
         raise ClientException(("Failed to contact Grid scheduler when getting URL %s. "
                                "This might be a temporary error, please retry later and "
                                "contact %s if the error persist. Error from curl: %s" % \
                                (url, FEEDBACKMAIL, str(e))))
Example #10
0
    def executeTestRun(self, filecacheurl):
        """
        Downloads the dry run tarball from the User File Cache and unpacks it in a temporary directory.
        Runs a trial to obtain the performance report. Repeats trial with successively larger input events
        until a job length of maxSeconds is reached (this improves accuracy for fast-running CMSSW parameter sets.)
        """
        ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
        cwd = os.getcwd()
        try:
            tmpDir = tempfile.mkdtemp()
            os.chdir(tmpDir)
            self.logger.info('Creating temporary directory for dry run sandbox in %s' % tmpDir)
            ufc.downloadLog('dry-run-sandbox.tar.gz', output=os.path.join(tmpDir, 'dry-run-sandbox.tar.gz'))
            for name in ['dry-run-sandbox.tar.gz', 'InputFiles.tar.gz', 'CMSRunAnalysis.tar.gz', 'sandbox.tar.gz']:
                tf = tarfile.open(os.path.join(tmpDir, name))
                tf.extractall(tmpDir)
                tf.close()
            env = os.environ.update({'CRAB3_RUNTIME_DEBUG': 'True', '_CONDOR_JOB_AD': 'Job.submit'})

            with open('splitting-summary.json') as f:
                splitting = json.load(f)

            if self.options.skipEstimates:
                return splitting, None

            self.logger.info('Executing test, please wait...')

            events = 10
            totalJobSeconds = 0
            maxSeconds = 25
            while totalJobSeconds < maxSeconds:
                opts = getCMSRunAnalysisOpts('Job.submit', 'RunJobs.dag', job=1, events=events)

                s = subprocess.Popen(['sh', 'CMSRunAnalysis.sh'] + opts, env=env, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
                out, err = s.communicate()
                self.logger.debug(out)
                if s.returncode != 0:
                    raise ClientException('Dry run failed to execute local test run:\n StdOut: %s\n StdErr: %s' % (out, err))

                #Once this https://github.com/dmwm/CRABServer/pull/4938 will get merged the job will be executed inside the CMSSW dir
                #Therefore the 'jobReport.json' will not be in the cwd. We will delete these three lines of code in the future
                jobReport = 'jobReport.json'
                if not os.path.isfile(jobReport):
                    jobReport = os.path.join( self.configreq["jobsw"], jobReport)
                with open(jobReport) as f:
                    report = json.load(f)['steps']['cmsRun']['performance']
                events += (maxSeconds / float(report['cpu']['AvgEventTime']))
                totalJobSeconds = float(report['cpu']['TotalJobTime'])

        finally:
            os.chdir(cwd)
            shutil.rmtree(tmpDir)

        return splitting, report
def getFileFromURL(url, filename = None, proxyfilename = None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file = proxyfilename, cert_file = proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        filestr = socket.read()
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException("Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s" % (url, status, socket.info()))
        exc.status = status
        raise exc
    with open(filename, 'w') as f:
        f.write(filestr)
    return filename
Example #12
0
def getFileFromURL(url, filename=None):
    """
    Read the content of a URL and copy it into a file.
    """
    if filename == None:
        path = urlparse(url).path
        filename = os.path.basename(path)
    try:
        socket = urllib.urlopen(url)
        filestr = socket.read()
    except IOError, ioex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ioex)
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        raise ClientException(msg)
Example #13
0
def getFileFromURL(url, filename=None, proxyfilename=None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file=proxyfilename,
                                  cert_file=proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        # Read the file by chunks instead of all at once, appending each chunk to the final result.
        # This lowers the memory overhead, which can be a problem with big files.
        with open(filename, 'a') as f:
            f.seek(0)
            f.truncate()
            while True:
                piece = socket.read(1024)
                if not piece:
                    break
                f.write(piece)
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (
            url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException(
            "Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s"
            % (url, status, socket.info()))
        exc.status = status
        raise exc
    return filename
Example #14
0
    def executeTestRun(self, inputArgs, jobnr):
        """ Execute a test run calling CMSRunAnalysis.sh
        """
        os.environ.update({
            'CRAB3_RUNTIME_DEBUG': 'True',
            '_CONDOR_JOB_AD': 'Job.submit'
        })

        optsList = [
            os.path.join(os.getcwd(), 'TweakPSet.py'),
            '-a %s' % inputArgs[jobnr - 1]['CRAB_Archive'],
            '-o %s' % inputArgs[jobnr - 1]['CRAB_AdditionalOutputFiles'],
            '--sourceURL=%s' % inputArgs[jobnr - 1]['CRAB_ISB'],
            '--location=%s' % os.getcwd(),
            '--inputFile=%s' % inputArgs[jobnr - 1]['inputFiles'],
            '--runAndLumis=%s' % inputArgs[jobnr - 1]['runAndLumiMask'],
            '--firstEvent=%s' % inputArgs[jobnr - 1]
            ['firstEvent'],  #jobs goes from 1 to N, inputArgs from 0 to N-1
            '--lastEvent=%s' % inputArgs[jobnr - 1]['lastEvent'],
            '--firstLumi=%s' % inputArgs[jobnr - 1]['firstLumi'],
            '--firstRun=%s' % inputArgs[jobnr - 1]['firstRun'],
            '--seeding=%s' % inputArgs[jobnr - 1]['seeding'],
            '--lheInputFiles=%s' % inputArgs[jobnr - 1]['lheInputFiles'],
            '--oneEventMode=0',
            '--eventsPerLumi=%s' % inputArgs[jobnr - 1]['eventsPerLumi'],
            '--maxRuntime=-1',
            '--jobNumber=%s' % (jobnr - 1),
            '--cmsswVersion=%s' % inputArgs[jobnr - 1]['CRAB_JobSW'],
            '--scramArch=%s' % inputArgs[jobnr - 1]['CRAB_JobArch'],
            '--scriptExe=%s' % inputArgs[jobnr - 1]['scriptExe'],
            '--scriptArgs=%s' % inputArgs[jobnr - 1]['scriptArgs'],
        ]
        # from a python list to a string which can be used as shell command argument
        opts = ''
        for opt in optsList:
            opts = opts + ' %s' % opt
        command = 'sh CMSRunAnalysis.sh ' + opts
        out, err, returncode = execute_command(command=command)
        self.logger.debug(out)
        self.logger.debug(err)
        if returncode != 0:
            raise ClientException(
                'Failed to execute local test run:\n StdOut: %s\n StdErr: %s' %
                (out, err))
Example #15
0
def getFileFromURL(url, filename=None, proxyfilename=None):
    """
    Read the content of a URL and copy it into a file.

    url: the link you would like to retrieve
    filename: the local filename where the url is saved to. Defaults to the filename in the url
    proxyfilename: the x509 proxy certificate to be used in case auth is required

    Return the filename used to save the file or raises ClientException in case of errors (a status attribute is added if the error is an http one).
    """
    parsedurl = urlparse(url)
    if filename == None:
        path = parsedurl.path
        filename = os.path.basename(path)
    try:
        opener = urllib.URLopener(key_file=proxyfilename,
                                  cert_file=proxyfilename)
        socket = opener.open(url)
        status = socket.getcode()
        filestr = socket.read()
    except IOError as ioex:
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        exc = ClientException(msg)
        if ioex[0] == 'http error':
            exc.status = ioex[1]
        raise exc
    except Exception as ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = "Unexpected error while trying to retrieve file from %s: %s" % (
            url, ex)
        raise ClientException(msg)
    if status != 200 and parsedurl.scheme in ['http', 'https']:
        exc = ClientException(
            "Unable to retieve the file from %s. HTTP status code %s. HTTP content: %s"
            % (url, status, socket.info()))
        exc.status = status
        raise exc
    with open(filename, 'w') as f:
        f.write(filestr)
    return filename
Example #16
0
    def run(self, filecacheurl=None):
        """
        Override run() for JobType
        """
        configArguments = {
            'addoutputfiles': [],
            'adduserfiles': [],
            'tfileoutfiles': [],
            'edmoutfiles': [],
        }

        if getattr(self.config.Data, 'useParent', False) and getattr(
                self.config.Data, 'secondaryInputDataset', None):
            msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together."
            raise ConfigurationException(msg)

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({
            'jobarch': scram.getScramArch(),
            'jobsw': scram.getCmsswVersion()
        })

        # Build tarball
        if self.workdir:
            tarUUID = PandaInterface.wrappedUuidGen()
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename = os.path.join(self.workdir,
                                           tarUUID + 'default.tgz')
                cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE)
            else:
                raise EnvironmentException(
                    'Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz')
            _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ## Create CMSSW config.
        self.logger.debug("self.config: %s" % (self.config))
        self.logger.debug("self.config.JobType.psetName: %s" %
                          (self.config.JobType.psetName))
        ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent
        ## in the sense that a second loading of the same pset may not produce the same
        ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
        ## pset twice. However, some "complicated" psets seem to evade the caching.
        ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
        ## it can be reused later if wanted (for example, in PrivateMC when checking if
        ## the pset has an LHE source) instead of having to load the pset again.
        ## As for what does "complicated" psets mean, Daniel Riley said that there are
        ## some psets where one module modifies the configuration from another module.
        self.cmsswCfg = CMSSWConfig(config=self.config,
                                    logger=self.logger,
                                    userConfig=self.config.JobType.psetName)

        ## If there is a CMSSW pset, do a basic validation of it.
        if not bootstrapDone() and self.config.JobType.psetName:
            valid, msg = self.cmsswCfg.validateConfig()
            if not valid:
                raise ConfigurationException(msg)

        ## We need to put the pickled CMSSW configuration in the right place.
        ## Here, we determine if the bootstrap script already run and prepared everything
        ## for us. In such case we move the file, otherwise we pickle.dump the pset
        if not bootstrapDone():
            # Write out CMSSW config
            self.cmsswCfg.writeFile(cfgOutputName)
        else:
            # Move the pickled and the configuration files created by the bootstrap script
            self.moveCfgFile(cfgOutputName)

        ## Interrogate the CMSSW pset for output files (only output files produced by
        ## PoolOutputModule or TFileService are identified automatically). Do this
        ## automatic detection even if JobType.disableAutomaticOutputCollection = True,
        ## so that we can still classify the output files in EDM, TFile and additional
        ## output files in the Task DB (and the job ad).
        ## TODO: Do we really need this classification at all? cmscp and PostJob read
        ## the FJR to know if an output file is EDM, TFile or other.
        edmfiles, tfiles = self.cmsswCfg.outputFiles()
        ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
        ## output files that are not listed in JobType.outputFiles.
        if getattr(
                self.config.JobType, 'disableAutomaticOutputCollection',
                getParamDefaultValue(
                    'JobType.disableAutomaticOutputCollection')):
            outputFiles = [
                re.sub(r'^file:', '', file)
                for file in getattr(self.config.JobType, 'outputFiles', [])
            ]
            edmfiles = [file for file in edmfiles if file in outputFiles]
            tfiles = [file for file in tfiles if file in outputFiles]
        ## Get the list of additional output files that have to be collected as given
        ## in JobType.outputFiles, but remove duplicates listed already as EDM files or
        ## TFiles.
        addoutputFiles = [
            re.sub(r'^file:', '', file)
            for file in getattr(self.config.JobType, 'outputFiles', [])
            if re.sub(r'^file:', '', file) not in edmfiles + tfiles
        ]
        self.logger.debug(
            "The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug(
            "The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug(
            "The following user output files will be collected: %s" %
            addoutputFiles)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)
        ## Give warning message in case no output file was detected in the CMSSW pset
        ## nor was any specified in the CRAB configuration.
        if not configArguments['edmoutfiles'] and not configArguments[
                'tfileoutfiles'] and not configArguments['addoutputfiles']:
            msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
            if getattr(
                    self.config.JobType, 'disableAutomaticOutputCollection',
                    getParamDefaultValue(
                        'JobType.disableAutomaticOutputCollection')):
                msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration"
                msg += " and no output file was explicitly specified in the CRAB configuration."
            else:
                msg += " CRAB could not detect any output file in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
            msg += " Hence CRAB will not collect any output file from this task."
            self.logger.warning(msg)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename,
                         logger=self.logger,
                         config=self.config) as tb:
            inputFiles = [
                re.sub(r'^file:', '', file)
                for file in getattr(self.config.JobType, 'inputFiles', [])
            ]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments['adduserfiles'] = [
                os.path.basename(f) for f in inputFiles
            ]
            try:
                uploadResult = tb.upload(filecacheurl=filecacheurl)
            except HTTPException as hte:
                if 'X-Error-Info' in hte.headers:
                    reason = hte.headers['X-Error-Info']
                    reason_re = re.compile(
                        r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$'
                    )
                    re_match = reason_re.match(reason)
                    if re_match:
                        ISBSize = int(re_match.group(1))
                        ISBSizeLimit = int(re_match.group(2))
                        reason = "%sError%s:" % (colors.RED, colors.NORMAL)
                        reason += " Input sanbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % (
                            ISBSize / 1024 / 1024, ISBSizeLimit / 1024 / 1024)
                        ISBContent = sorted(tb.content, reverse=True)
                        biggestFileSize = ISBContent[0][0]
                        ndigits = int(
                            math.ceil(math.log(biggestFileSize + 1, 10)))
                        reason += "\nInput sanbox content sorted by size[Bytes]:"
                        for (size, name) in ISBContent:
                            reason += ("\n%" + str(ndigits) + "s\t%s") % (size,
                                                                          name)
                        raise ClientException(reason)
                raise hte
            except Exception as e:
                msg = (
                    "Impossible to calculate the checksum of the sandbox tarball.\nError message: %s.\n"
                    "More details can be found in %s" %
                    (e, self.logger.logfile))
                LOGGERS['CRAB3'].exception(
                    msg)  #the traceback is only printed into the logfile
                raise ClientException(msg)

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % uploadResult
        self.logger.debug("Result uploading input files: %(cachefilename)s " %
                          configArguments)

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug(
                "Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(
                self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" %
                              (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
            except ValueError as ex:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name,
                                                              ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$',
                                           run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(
                            len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs=run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [
                str(reduce(lambda x, y: x + y,
                           lumi_mask[run]))[1:-1].replace(' ', '')
                for run in configArguments['runs']
            ]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments
Example #17
0
def getLoggers():
    msg = "%sError%s: The function getLoggers(loglevel) from CRABClient.ClientUtilities has been deprecated." % (
        colors.RED, colors.NORMAL)
    msg += " Please use the new function setConsoleLogLevel(loglevel) from CRABClient.UserUtilities instead."
    raise ClientException(msg)
Example #18
0
        msg += "\n  Stdout:\n    %s" % (str(stdout).replace('\n', '\n    '))
        raise UsernameException(msg)
    return username


def getFileFromURL(url, filename=None):
    """
    Read the content of a URL and copy it into a file.
    """
    if filename == None:
        path = urlparse(url).path
        filename = os.path.basename(path)
    try:
        socket = urllib.urlopen(url)
        filestr = socket.read()
    except IOError, ioex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ioex)
        msg = "Error while trying to retrieve file from %s: %s" % (url, ioex)
        msg += "\nMake sure the URL is correct."
        raise ClientException(msg)
    except Exception, ex:
        tblogger = logging.getLogger('CRAB3')
        tblogger.exception(ex)
        msg = 'Unexpected error while trying to retrieve file from %s: %s' % (
            url, ex)
        raise ClientException(msg)
    with open(filename, 'w') as f:
        f.write(filestr)
    return filename