Example #1
0
    def getInputFiles(self):
        """ Get the InputFiles.tar.gz and extract the necessary files
        """
        taskname = self.cachedinfo['RequestName']

        #Get task status from the task DB
        self.logger.debug("Getting status from he DB")
        uri = self.getUrl(self.instance, resource = 'task')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version = __version__)
        crabDBInfo, _, _ =  server.get(uri, data = {'subresource': 'search', 'workflow': taskname})
        status = getColumn(crabDBInfo, 'tm_task_status')
        self.destination = getColumn(crabDBInfo, 'tm_asyncdest')

        inputsFilename = os.path.join(os.getcwd(), 'InputFiles.tar.gz')
        if status == 'UPLOADED':
            filecacheurl = getColumn(crabDBInfo, 'tm_cache_url')
            ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
            self.logger.debug("Downloading and extracting 'dry-run-sandbox.tar.gz' from %s" % filecacheurl)
            ufc.downloadLog('dry-run-sandbox.tar.gz', output=os.path.join(os.getcwd(), 'dry-run-sandbox.tar.gz'))
            with tarfile.open('dry-run-sandbox.tar.gz') as tf:
                tf.extractall()
        elif status == 'SUBMITTED':
            webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug)
            if not webdir:
                webdir = getColumn(crabDBInfo, 'tm_user_webdir')
            self.logger.debug("Downloading 'InputFiles.tar.gz' from %s" % webdir)
            getFileFromURL(webdir + '/InputFiles.tar.gz', inputsFilename, self.proxyfilename)
        else:
            raise ClientException('Can only execute jobs from tasks in status SUBMITTED or UPLOADED. Current status is %s' % status)

        for name in [inputsFilename, 'CMSRunAnalysis.tar.gz', 'sandbox.tar.gz']:
            with tarfile.open(name) as tf:
                tf.extractall()
Example #2
0
    def __call__(self):
        if self.options.short:
            taskname = self.cachedinfo['RequestName']
            inputlist = {'subresource': 'webdir', 'workflow': taskname}
            serverFactory = CRABClient.Emulator.getEmulator('rest')
            server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
            uri = self.getUrl(self.instance, resource = 'task')
            webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug)
            if not webdir:
                dictresult, status, reason =  server.get(uri, data = inputlist)
                webdir = dictresult['result'][0]
                self.logger.info('Server result: %s' % webdir)
                if status != 200:
                    msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
                    raise RESTCommunicationException(msg)
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            failed, success = self.retrieveShortLogs(webdir, self.proxyfilename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (colors.RED,colors.NORMAL,failed)
                self.logger.info(msg)
            else:
                self.logger.info("%sSuccess%s: All files successfully retrieved." % (colors.GREEN,colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            returndict = getcommand.__call__(self, subresource = 'logs')
            if ('success' in returndict and not returndict['success']) or \
               ('failed'  in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)

        return returndict
Example #3
0
def saveProxiedWebdir(ad):
    """ The function queries the REST interface to get the proxied webdir and sets
        a classad so that we report this to the dashboard isntead of the regular URL

        The proxied_url (if exist) is written to a file named proxied_webdir so that
        prejobs can read it and report to dashboard. If the url does not exist
        (i.e.: schedd not at CERN), the file is not written and we report the usual
        webdir

        See https://github.com/dmwm/CRABServer/issues/4883
    """
    # Get the proxied webdir from the REST itnerface
    task = ad['CRAB_ReqName']
    host = ad['CRAB_RestHost']
    uri = ad['CRAB_RestURInoAPI'] + '/task'
    cert = ad['X509UserProxy']
    res = getProxiedWebDir(task, host, uri, cert, logFunction = printLog)

    # We need to use a file to communicate this to the prejob. I tried things like:
    #    htcondor.Schedd().edit([dagJobId], 'CRAB_UserWebDirPrx', ad.lookup('CRAB_UserWebDir'))
    # but the prejob read the classads from the file, not querying the schedd.and we can't update
    # the classad file since it's owned by user condor
    if res:
        with open("proxied_webdir", "w") as fd:
            fd.write(res)
    else:
        printLog("Cannot get proxied webdir from the server. Maybe the schedd does not have one in the REST configuration?")
        return 1

    return 0
Example #4
0
    def getTaskDict(self):
        #getting information about the task
        inputlist = {'subresource':'search', 'workflow': self.config.JobType.copyCatTaskname}
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        serverhost = SERVICE_INSTANCES.get(self.config.JobType.copyCatInstance)
        server = serverFactory(serverhost, self.proxyfilename, self.proxyfilename, version=__version__)
        uri = getUrl(self.config.JobType.copyCatInstance, resource='task')
        dictresult, dummyStatus, dummyReason = server.get(uri, data=inputlist)
        webdir = getProxiedWebDir(self.config.JobType.copyCatTaskname, serverhost, uri, self.proxyfilename, self.logger.debug)
        if not webdir:
            webdir = getColumn(dictresult, 'tm_user_webdir')

        return dictresult, webdir
Example #5
0
    def __call__(self):
        if self.options.short:
            taskname = self.cachedinfo['RequestName']
            inputlist = {'subresource': 'webdir', 'workflow': taskname}
            serverFactory = CRABClient.Emulator.getEmulator('rest')
            server = serverFactory(self.serverurl,
                                   self.proxyfilename,
                                   self.proxyfilename,
                                   version=__version__)
            uri = self.getUrl(self.instance, resource='task')
            webdir = getProxiedWebDir(taskname, self.serverurl, uri,
                                      self.proxyfilename, self.logger.debug)
            if not webdir:
                dictresult, status, reason = server.get(uri, data=inputlist)
                webdir = dictresult['result'][0]
                self.logger.info('Server result: %s' % webdir)
                if status != 200:
                    msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (
                        str(inputlist), str(dictresult), str(reason))
                    raise RESTCommunicationException(msg)
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            failed, success = self.retrieveShortLogs(webdir,
                                                     self.proxyfilename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (
                    colors.RED, colors.NORMAL, failed)
                self.logger.info(msg)
            else:
                self.logger.info(
                    "%sSuccess%s: All files successfully retrieved." %
                    (colors.GREEN, colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            # Different from the old getlog code: set 'logs2' as subresource so that 'getcommand' uses the new logic.
            returndict = getcommand.__call__(self, subresource='logs2')
            if ('success' in returndict and not returndict['success']) or \
               ('failed'  in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)

        return returndict
Example #6
0
def saveProxiedWebdir(crabserver, ad):
    """ The function queries the REST interface to get the proxied webdir and sets
        a classad so that we report this to the dashboard instead of the regular URL.

        The webdir (if exists) is written to a file named 'webdir' so that
        prejobs can read it and report to dashboard. If the proxied URL does not exist
        (i.e.: schedd not at CERN), we report the usual webdir.

        See https://github.com/dmwm/CRABServer/issues/4883
    """
    # Get the proxied webdir from the REST itnerface
    task = ad['CRAB_ReqName']
    webDir_adName = 'CRAB_WebDirURL'
    ad[webDir_adName] = ad['CRAB_localWebDirURL']
    proxied_webDir = getProxiedWebDir(crabserver=crabserver,
                                      task=task,
                                      logFunction=printLog)
    if proxied_webDir:  # Prefer the proxied webDir to the non-proxied one
        ad[webDir_adName] = str(proxied_webDir)

    if ad[webDir_adName]:
        # This condor_edit is required because in the REST interface we look for the webdir if the DB upload failed (or in general if we use the "old logic")
        # See https://github.com/dmwm/CRABServer/blob/3.3.1507.rc8/src/python/CRABInterface/HTCondorDataWorkflow.py#L398
        dagJobId = '%d.%d' % (ad['ClusterId'], ad['ProcId'])
        try:
            htcondor.Schedd().edit([dagJobId], webDir_adName,
                                   '{0}'.format(ad.lookup(webDir_adName)))
        except RuntimeError as reerror:
            printLog(str(reerror))

        # We need to use a file to communicate this to the prejob. I tried to read the corresponding ClassAd from the preJob like:
        # htcondor.Schedd().xquery(requirements="ClusterId == %d && ProcId == %d" % (self.task_ad['ClusterId'], self.task_ad['ProcId']), projection=[webDir_adName]).next().get(webDir_adName)
        # but it is too heavy of an operation with HTCondor v8.8.3
        with open("webdir", "w") as fd:
            fd.write(ad[webDir_adName])
    else:
        printLog(
            "Cannot get proxied webdir from the server. Maybe the schedd does not have one in the REST configuration?"
        )
        return 1

    return 0
Example #7
0
    def getTaskDict(self):
        #getting information about the task
        inputlist = {
            'subresource': 'search',
            'workflow': self.config.JobType.copyCatTaskname
        }
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        serverhost = SERVICE_INSTANCES.get(self.config.JobType.copyCatInstance)
        server = serverFactory(serverhost,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        dictresult, dummyStatus, dummyReason = server.get(api='task',
                                                          data=inputlist)
        webdir = getProxiedWebDir(self.config.JobType.copyCatTaskname,
                                  serverhost, uri, self.proxyfilename,
                                  self.logger.debug)
        if not webdir:
            webdir = getColumn(dictresult, 'tm_user_webdir')

        return dictresult, webdir
Example #8
0
    def __call__(self):
        # Get all of the columns from the database for a certain task
        taskname = self.cachedinfo['RequestName']
        uri = self.getUrl(self.instance, resource='task')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        crabDBInfo, _, _ = server.get(uri,
                                      data={
                                          'subresource': 'search',
                                          'workflow': taskname
                                      })
        self.logger.debug("Got information from server oracle database: %s",
                          crabDBInfo)

        # Until the task lands on a schedd we'll show the status from the DB
        combinedStatus = getColumn(crabDBInfo, 'tm_task_status')

        user = getColumn(crabDBInfo, 'tm_username')
        webdir = getColumn(crabDBInfo, 'tm_user_webdir')
        rootDagId = getColumn(crabDBInfo,
                              'clusterid')  #that's the condor id from the TW
        asourl = getColumn(crabDBInfo, 'tm_asourl')
        asodb = getColumn(crabDBInfo, 'tm_asodb')
        publicationEnabled = True if getColumn(
            crabDBInfo, 'tm_publication') == 'T' else False

        #Print information from the database
        self.printTaskInfo(crabDBInfo, user)
        if not rootDagId:
            failureMsg = "The task has not been submitted to the Grid scheduler yet. Not printing job information."
            self.logger.debug(failureMsg)
            return self.makeStatusReturnDict(crabDBInfo,
                                             combinedStatus,
                                             statusFailureMsg=failureMsg)

        self.logger.debug(
            "The CRAB server submitted your task to the Grid scheduler (cluster ID: %s)"
            % rootDagId)

        if not webdir:
            # Query condor through the server for information about this task
            uri = self.getUrl(self.instance, resource='workflow')
            params = {'subresource': 'taskads', 'workflow': taskname}

            res = server.get(uri, data=params)[0]['result'][0]
            # JobStatus 5 = Held
            if res['JobStatus'] == '5' and 'DagmanHoldReason' in res:
                # If we didn't find a webdir in the DB and the DAG is held,
                # the task bootstrapping failed before or during the webdir
                # upload and the reason should be printed.
                failureMsg = "The task failed to bootstrap on the Grid scheduler."
                failureMsg += " Please send an e-mail to %s." % (FEEDBACKMAIL)
                failureMsg += "\nHold reason: %s" % (res['DagmanHoldReason'])
                self.logger.info(failureMsg)
                combinedStatus = "FAILED"
            else:
                # if the dag is submitted and the webdir is not there we have to wait that AdjustSites run
                # and upload the webdir location to the server
                self.logger.info(
                    "Waiting for the Grid scheduler to bootstrap your task")
                failureMsg = "Schedd has not reported back the webdir (yet)"
                self.logger.debug(failureMsg)
                combinedStatus = "UNKNOWN"
            return self.makeStatusReturnDict(crabDBInfo,
                                             combinedStatus,
                                             statusFailureMsg=failureMsg)

        self.logger.debug("Webdir is located at %s", webdir)

        proxiedWebDir = getProxiedWebDir(taskname, self.serverurl, uri,
                                         self.proxyfilename, self.logger.debug)
        if not proxiedWebDir:
            msg = "Failed to get the proxied webdir from CRABServer. "
            msg += "\nWill fall back to the regular webdir url for file downloads "
            msg += "but will likely fail if the client is located outside CERN."
            self.logger.debug(msg)
            proxiedWebDir = webdir
        self.logger.debug("Proxied webdir is located at %s", proxiedWebDir)

        # Download status_cache file
        url = proxiedWebDir + "/status_cache"
        self.logger.debug("Retrieving 'status_cache' file from %s", url)

        statusCacheInfo = None
        try:
            statusCacheData = getDataFromURL(url, self.proxyfilename)
        except HTTPException as ce:
            self.logger.info(
                "Waiting for the Grid scheduler to report back the status of your task"
            )
            failureMsg = "Cannot retrieve the status_cache file. Maybe the task process has not run yet?"
            failureMsg += " Got:\n%s" % ce
            self.logger.error(failureMsg)
            logging.getLogger("CRAB3").exception(ce)
            combinedStatus = "UNKNOWN"
            return self.makeStatusReturnDict(crabDBInfo,
                                             combinedStatus,
                                             statusFailureMsg=failureMsg)
        else:
            # We skip first two lines of the file because they contain the checkpoint locations
            # for the job_log / fjr_parse_results files and are used by the status caching script.
            # Load the job_report summary
            statusCacheInfo = literal_eval(statusCacheData.split('\n')[2])
            self.logger.debug("Got information from status cache file: %s",
                              statusCacheInfo)

        # If the task is already on the grid, show the dagman status
        combinedStatus = dagStatus = self.printDAGStatus(
            crabDBInfo, statusCacheInfo)

        shortResult = self.printShort(statusCacheInfo)
        pubStatus = self.printPublication(publicationEnabled,
                                          shortResult['jobsPerStatus'], asourl,
                                          asodb, taskname, user, crabDBInfo)
        self.printErrors(statusCacheInfo)

        if self.options.summary:
            self.printSummary(statusCacheInfo)
        if self.options.long or self.options.sort:
            # If user correctly passed some jobid CSVs to use in the status --long, self.jobids
            # will be a list of strings already parsed from the input by the validateOptions()
            if self.jobids:
                self.checkUserJobids(statusCacheInfo, self.jobids)
            sortdict = self.printLong(statusCacheInfo,
                                      self.jobids,
                                      quiet=(not self.options.long))
            if self.options.sort:
                self.printSort(sortdict, self.options.sort)
        if self.options.json:
            self.logger.info(json.dumps(statusCacheInfo))

        statusDict = self.makeStatusReturnDict(crabDBInfo, combinedStatus,
                                               dagStatus, '', shortResult,
                                               statusCacheInfo, pubStatus,
                                               proxiedWebDir)

        return statusDict