Beispiel #1
0
    def getLumisToProcess(self, userWebDirURL, jobs, workflow):
        """
        What each job was requested to process

        Get the lumis to process by each job in the workflow.
        """
        res = {}
        if userWebDirURL:
            url = userWebDirURL + "/run_and_lumis.tar.gz"
            tarFilename = os.path.join(self.requestarea, 'results/run_and_lumis.tar.gz')
            try:
                getFileFromURL(url, tarFilename, self.proxyfilename)

                # Not using 'with tarfile.open(..) as t:' syntax because
                # the tarfile module only received context manager protocol support
                # in python 2.7, whereas CMSSW_5_* uses python 2.6 and breaks here.
                tarball = tarfile.open(tarFilename)
                for jobid in jobs:
                    filename = "job_lumis_%s.json" % (jobid)
                    try:
                        member = tarball.getmember(filename)
                    except KeyError:
                        self.logger.warning("File %s not found in run_and_lumis.tar.gz for task %s" % (filename, workflow))
                    else:
                        fd = tarball.extractfile(member)
                        try:
                            res[str(jobid)] = json.load(fd)
                        finally:
                            fd.close()
                tarball.close()
            except HTTPException as hte:
                self.logger.error("Failed to retrieve input dataset duplicate lumis.")
                logging.getLogger('CRAB3').exception(hte)

        return res
Beispiel #2
0
    def getInputDatasetLumis(self, inputDataset, userWebDirURL):
        """
        What the input dataset had in DBS when the task was submitted

        Get the lumis (and the lumis split across files) in the input dataset. Files
        containing this information were created at data discovery time and then
        copied to the schedd.
        """
        res = {}
        res['inputDataset'] = {'lumis': {}, 'duplicateLumis': {}}
        if inputDataset and userWebDirURL:
            url = userWebDirURL + "/input_dataset_lumis.json"
            filename = os.path.join(self.requestarea, 'results/input_dataset_lumis.json')
            try:
                ## Retrieve the lumis in the input dataset.
                getFileFromURL(url, filename, self.proxyfilename)
                with open(filename) as fd:
                    res['inputDataset']['lumis'] = json.load(fd)
            except HTTPException as hte:
                self.logger.error("Failed to retrieve input dataset lumis.")
                logging.getLogger('CRAB3').exception(hte)

            url = userWebDirURL + "/input_dataset_duplicate_lumis.json"
            filename = os.path.join(self.requestarea, 'results/input_dataset_duplicate_lumis.json')
            try:
                ## Retrieve the lumis split across files in the input dataset.
                getFileFromURL(url, filename, self.proxyfilename)
                with open(filename) as fd:
                    res['inputDataset']['duplicateLumis'] = json.load(fd)
            except HTTPException as hte:
                self.logger.error("Failed to retrieve input dataset duplicate lumis.")
                logging.getLogger('CRAB3').exception(hte)

        return res
Beispiel #3
0
    def retrieveShortLogs(self, webdir, proxyfilename):
        self.logger.info("Retrieving...")
        success = []
        failed = []
        for _, jobid in self.options.jobids:
            ## We don't know a priori how many retries the job had. So we start with retry 0
            ## and increase it by 1 until we are unable to retrieve a log file (interpreting
            ## this as the fact that we reached the highest retry already).
            retry = 0
            succeded = True
            while succeded:
                filename = 'job_out.%s.%s.txt' % (jobid, retry)
                url = webdir + '/' + filename
                try:
                    getFileFromURL(url, self.dest + '/' + filename, proxyfilename)
                    self.logger.info('Retrieved %s' % (filename))
                    success.append(filename)
                    retry += 1 #To retrieve retried job log, if there is any.
                except ClientException as ex:
                    succeded = False
                    ## Ignore the exception if the HTTP status code is 404. Status 404 means file
                    ## not found (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html). File
                    ## not found error is expected, since we try all the job retries.
                    if not hasattr(ex, "status") or ex.status!=404:
                        self.logger.debug(str(ex))
                        failed.append(filename)

        return failed, success
Beispiel #4
0
    def retrieveShortLogs(self, webdir, proxyfilename):
        self.logger.info("Retrieving...")
        success = []
        failed = []
        for _, jobid in self.options.jobids:
            ## We don't know a priori how many retries the job had. So we start with retry 0
            ## and increase it by 1 until we are unable to retrieve a log file (interpreting
            ## this as the fact that we reached the highest retry already).
            retry = 0
            succeded = True
            while succeded:
                filename = 'job_out.%s.%s.txt' % (jobid, retry)
                url = webdir + '/' + filename
                try:
                    getFileFromURL(url, self.dest + '/' + filename,
                                   proxyfilename)
                    self.logger.info('Retrieved %s' % (filename))
                    success.append(filename)
                    retry += 1  #To retrieve retried job log, if there is any.
                except ClientException as ex:
                    succeded = False
                    ## Ignore the exception if the HTTP status code is 404. Status 404 means file
                    ## not found (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html). File
                    ## not found error is expected, since we try all the job retries.
                    if not hasattr(ex, "status") or ex.status != 404:
                        self.logger.debug(str(ex))
                        failed.append(filename)

        return failed, success
Beispiel #5
0
    def getLumisToProcess(self, userWebDirURL, numJobs, workflow):
        """
        What each job was requested to process

        Get the lumis to process by each job in the workflow.
        """
        res = {}
        if userWebDirURL:
            url = userWebDirURL + "/run_and_lumis.tar.gz"
            tarFilename = os.path.join(self.requestarea,
                                       'results/run_and_lumis.tar.gz')
            try:
                getFileFromURL(url, tarFilename, self.proxyfilename)
                with tarfile.open(tarFilename) as tarball:
                    for jobid in xrange(1, numJobs + 1):
                        filename = "job_lumis_%d.json" % (jobid)
                        try:
                            member = tarball.getmember(filename)
                        except KeyError:
                            self.logger.warning(
                                "File %s not found in run_and_lumis.tar.gz for task %s"
                                % (filename, workflow))
                        else:
                            fd = tarball.extractfile(member)
                            try:
                                res[str(jobid)] = json.load(fd)
                            finally:
                                fd.close()
            except HTTPException as hte:
                self.logger.error(
                    "Failed to retrieve input dataset duplicate lumis.")
                logging.getLogger('CRAB3').exception(hte)

        return res
Beispiel #6
0
    def getInputFiles(self):
        """ Get the InputFiles.tar.gz and extract the necessary files
        """
        taskname = self.cachedinfo['RequestName']

        #Get task status from the task DB
        self.logger.debug("Getting status from he DB")
        uri = self.getUrl(self.instance, resource = 'task')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version = __version__)
        crabDBInfo, _, _ =  server.get(uri, data = {'subresource': 'search', 'workflow': taskname})
        status = getColumn(crabDBInfo, 'tm_task_status')
        self.destination = getColumn(crabDBInfo, 'tm_asyncdest')

        inputsFilename = os.path.join(os.getcwd(), 'InputFiles.tar.gz')
        if status == 'UPLOADED':
            filecacheurl = getColumn(crabDBInfo, 'tm_cache_url')
            ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
            self.logger.debug("Downloading and extracting 'dry-run-sandbox.tar.gz' from %s" % filecacheurl)
            ufc.downloadLog('dry-run-sandbox.tar.gz', output=os.path.join(os.getcwd(), 'dry-run-sandbox.tar.gz'))
            with tarfile.open('dry-run-sandbox.tar.gz') as tf:
                tf.extractall()
        elif status == 'SUBMITTED':
            webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug)
            if not webdir:
                webdir = getColumn(crabDBInfo, 'tm_user_webdir')
            self.logger.debug("Downloading 'InputFiles.tar.gz' from %s" % webdir)
            getFileFromURL(webdir + '/InputFiles.tar.gz', inputsFilename, self.proxyfilename)
        else:
            raise ClientException('Can only execute jobs from tasks in status SUBMITTED or UPLOADED. Current status is %s' % status)

        for name in [inputsFilename, 'CMSRunAnalysis.tar.gz', 'sandbox.tar.gz']:
            with tarfile.open(name) as tf:
                tf.extractall()
Beispiel #7
0
    def __call__(self):
        if self.options.short:
            inputlist = {'subresource': 'webdir', 'workflow': self.cachedinfo['RequestName']}
            serverFactory = CRABClient.Emulator.getEmulator('rest')
            server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) 
            uri = self.getUrl(self.instance, resource = 'task')
            dictresult, status, reason =  server.get(uri, data = inputlist)
            self.logger.info('Server result: %s' % dictresult['result'][0])
            dictresult = self.processServerResult(dictresult)
            if status != 200:
                msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
                raise RESTCommunicationException(msg)
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            self.logger.info("Retrieving...")
            success = []
            failed = []        
            for item in self.options.jobids:
                jobid = str(item[1])
                filename = 'job_out.'+jobid+'.0.txt'
                url = dictresult['result'][0]+'/'+filename
                try:
                    getFileFromURL(url, self.dest+'/'+filename)
                    self.logger.info ('Retrieved %s' % (filename))
                    success.append(filename)
                    retry = 1
                    #To retrieve retried joblog, if there is any.
                    while urllib.urlopen(dictresult['result'][0]+'/'+'job_out.'+jobid+'.'+str(retry)+'.txt').getcode() == 200:
                        filename = 'job_out.'+jobid+'.'+str(retry)+'.txt'
                        url = dictresult['result'][0]+'/'+filename
                        getFileFromURL(url, self.dest+'/'+filename)
                        self.logger.info ('Retrieved %s' % (filename))
                        success.append(filename)
                        retry = retry + 1
                except ClientException as ex:
                    self.logger.debug(str(ex))
                    failed.append(filename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (colors.RED,colors.NORMAL,failed)
                self.logger.info(msg)
            else:
                self.logger.info("%sSuccess%s: All files successfully retrieved." % (colors.GREEN,colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            returndict = getcommand.__call__(self, subresource = 'logs')
            if ('success' in returndict and not returndict['success']) or \
               ('failed'  in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)
 
        return returndict
Beispiel #8
0
    def getLumisToProcess(self, userWebDirURL, jobs, workflow):
        """
        What each job was requested to process

        Get the lumis to process by each job in the workflow.
        """
        res = {}
        if userWebDirURL:
            url = userWebDirURL + "/run_and_lumis.tar.gz"
            tarFilename = os.path.join(self.requestarea,
                                       'results/run_and_lumis.tar.gz')
            try:
                getFileFromURL(url, tarFilename, self.proxyfilename)

                # Not using 'with tarfile.open(..) as t:' syntax because
                # the tarfile module only received context manager protocol support
                # in python 2.7, whereas CMSSW_5_* uses python 2.6 and breaks here.
                tarball = tarfile.open(tarFilename)
                for jobid in jobs:
                    filename = "job_lumis_%s.json" % (jobid)
                    try:
                        member = tarball.getmember(filename)
                    except KeyError:
                        self.logger.warning(
                            "File %s not found in run_and_lumis.tar.gz for task %s"
                            % (filename, workflow))
                    else:
                        fd = tarball.extractfile(member)
                        try:
                            res[str(jobid)] = json.load(fd)
                        finally:
                            fd.close()
                tarball.close()
            except HTTPException as hte:
                self.logger.error(
                    "Failed to retrieve input dataset duplicate lumis.")
                logging.getLogger('CRAB3').exception(hte)

        return res
Beispiel #9
0
    def getInputDatasetLumis(self, inputDataset, userWebDirURL):
        """
        What the input dataset had in DBS when the task was submitted

        Get the lumis (and the lumis split across files) in the input dataset. Files
        containing this information were created at data discovery time and then
        copied to the schedd.
        """
        res = {}
        res['inputDataset'] = {'lumis': {}, 'duplicateLumis': {}}
        if inputDataset and userWebDirURL:
            url = userWebDirURL + "/input_dataset_lumis.json"
            filename = os.path.join(self.requestarea,
                                    'results/input_dataset_lumis.json')
            try:
                ## Retrieve the lumis in the input dataset.
                getFileFromURL(url, filename, self.proxyfilename)
                with open(filename) as fd:
                    res['inputDataset']['lumis'] = json.load(fd)
            except HTTPException as hte:
                self.logger.error("Failed to retrieve input dataset lumis.")
                logging.getLogger('CRAB3').exception(hte)

            url = userWebDirURL + "/input_dataset_duplicate_lumis.json"
            filename = os.path.join(
                self.requestarea, 'results/input_dataset_duplicate_lumis.json')
            try:
                ## Retrieve the lumis split across files in the input dataset.
                getFileFromURL(url, filename, self.proxyfilename)
                with open(filename) as fd:
                    res['inputDataset']['duplicateLumis'] = json.load(fd)
            except HTTPException as hte:
                self.logger.error(
                    "Failed to retrieve input dataset duplicate lumis.")
                logging.getLogger('CRAB3').exception(hte)

        return res
Beispiel #10
0
    def __call__(self):
        # Get all of the columns from the database for a certain task
        taskname = self.cachedinfo['RequestName']
        uri = self.getUrl(self.instance, resource = 'task')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        crabDBInfo, _, _ =  server.get(uri, data = {'subresource': 'search', 'workflow': taskname})
        self.logger.debug("Got information from server oracle database: %s", crabDBInfo)

        user = self.getColumn(crabDBInfo, 'tm_username')
        webdir = self.getColumn(crabDBInfo, 'tm_user_webdir')
        rootDagId = self.getColumn(crabDBInfo, 'clusterid') #that's the condor id from the TW

        #Print information from the database
        self.printTaskInfo(crabDBInfo, user)
        if rootDagId and not webdir:
            # if the dag is submitted and the webdir is not there we have to wait that AdjustSites run
            # and upload the webdir location to the server
            self.logger.info("The CRAB server submitted your task to the Grid scheduler (ID: %s)")
            self.logger.info("Waiting for the scheduler to report back the status of your task")
            return crabDBInfo, None

        self.logger.debug("Webdir is located at %s", webdir)
        # Download status_cache file
        self.logger.debug("Retrieving 'status_cache' file from webdir")
        url = webdir + '/' + "status_cache"

        statusCacheInfo = None
        statusCacheFilename = getFileFromURL(url, proxyfilename=self.proxyfilename)
        with open(statusCacheFilename) as fd:
            # Skip first line of the file (it contains info for the caching script) and load job_report summary
            fd.readline()
            statusCacheInfo = literal_eval(fd.readline())
        self.logger.debug("Got information from status cache file: %s", statusCacheInfo)

        self.printDAGStatus(statusCacheInfo)

        shortResult = self.printShort(statusCacheInfo)
        self.printErrors(statusCacheInfo)
        if self.options.summary:
            self.printSummary(statusCacheInfo)
        if self.options.long or self.options.sort:
            sortdict = self.printLong(statusCacheInfo, quiet = (not self.options.long))
            if self.options.sort:
                self.printSort(sortdict, self.options.sort)
        if self.options.json:
            self.logger.info(json.dumps(statusCacheInfo))

        return crabDBInfo, shortResult
Beispiel #11
0
    def __call__(self):
        # Get all of the columns from the database for a certain task
        taskname = self.cachedinfo["RequestName"]
        uri = self.getUrl(self.instance, resource="task")
        serverFactory = CRABClient.Emulator.getEmulator("rest")
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        crabDBInfo, _, _ = server.get(uri, data={"subresource": "search", "workflow": taskname})
        self.logger.debug("Got information from server oracle database: %s", crabDBInfo)

        user = self.getColumn(crabDBInfo, "tm_username")
        webdir = self.getColumn(crabDBInfo, "tm_user_webdir")
        rootDagId = self.getColumn(crabDBInfo, "clusterid")  # that's the condor id from the TW

        # Print information from the database
        self.printTaskInfo(crabDBInfo, user)
        if not rootDagId:
            self.logger.debug(
                "The task has not been submitted to the Grid scheduler yet. Not printing job information."
            )
            return crabDBInfo, None

        self.logger.debug("The CRAB server submitted your task to the Grid scheduler (cluster ID: %s)" % rootDagId)

        if not webdir:
            # if the dag is submitted and the webdir is not there we have to wait that AdjustSites run
            # and upload the webdir location to the server
            self.logger.info("Waiting for the Grid scheduler to bootstrap your task")
            self.logger.debug("Schedd has not reported back the webdir (yet)")
            return crabDBInfo, None

        self.logger.debug("Webdir is located at %s", webdir)
        # Download status_cache file
        self.logger.debug("Retrieving 'status_cache' file from webdir")
        url = webdir + "/" + "status_cache"

        statusCacheInfo = None
        try:
            statusCacheFilename = getFileFromURL(url, proxyfilename=self.proxyfilename)
        except ClientException as ce:
            self.logger.info("Waiting for the Grid scheduler to report back the status of your task")
            self.logger.debug("Cannot retrieve the status_cache file. Maybe the task process has not run yet?")
            self.logger.debug("Got: %s" % ce)
            return crabDBInfo, None
        else:
            with open(statusCacheFilename) as fd:
                # Skip first line of the file (it contains info for the caching script) and load job_report summary
                fd.readline()
                statusCacheInfo = literal_eval(fd.readline())
            self.logger.debug("Got information from status cache file: %s", statusCacheInfo)

        self.printDAGStatus(crabDBInfo, statusCacheInfo)

        shortResult = self.printShort(statusCacheInfo)
        self.printErrors(statusCacheInfo)
        if self.options.summary:
            self.printSummary(statusCacheInfo)
        if self.options.long or self.options.sort:
            sortdict = self.printLong(statusCacheInfo, quiet=(not self.options.long))
            if self.options.sort:
                self.printSort(sortdict, self.options.sort)
        if self.options.json:
            self.logger.info(json.dumps(statusCacheInfo))

        return crabDBInfo, shortResult
Beispiel #12
0
    def run(self, filecacheurl = None):
        """
        Override run() for JobType
        """

        taskDict, webdir = self.getTaskDict()
        addoutputfiles = literal_eval(getColumn(taskDict, 'tm_outfiles'))
        tfileoutfiles = literal_eval(getColumn(taskDict, 'tm_tfile_outfiles'))
        edmoutfiles = literal_eval(getColumn(taskDict, 'tm_edm_outfiles'))
        jobarch = getColumn(taskDict, 'tm_job_arch')
        jobsw = getColumn(taskDict, 'tm_job_sw')

        sandboxFilename = os.path.join(self.workdir, 'sandbox.tar.gz')
        getFileFromURL(webdir + '/sandbox.tar.gz', sandboxFilename, self.proxyfilename)

        configArguments = {'addoutputfiles' : addoutputfiles,
                           'tfileoutfiles' : tfileoutfiles,
                           'edmoutfiles' : edmoutfiles,
                           'jobarch' : jobarch,
                           'jobsw' : jobsw,
                          }

        # Maybe the user wnat to change the dataset
        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
        result = ufc.upload(sandboxFilename, excludeList = NEW_USER_SANDBOX_EXCLUSIONS)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" % str(result))
            raise CachefileNotFoundException

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % str(result['hashkey'])

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug("Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg  = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
            except ValueError as ex:
                msg  = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg  = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs = run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]

        configArguments['jobtype'] = 'Analysis'

        return sandboxFilename, configArguments
Beispiel #13
0
    def __call__(self):
        # Get all of the columns from the database for a certain task
        taskname = self.cachedinfo['RequestName']
        uri = self.getUrl(self.instance, resource='task')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        crabDBInfo, _, _ = server.get(uri,
                                      data={
                                          'subresource': 'search',
                                          'workflow': taskname
                                      })
        self.logger.debug("Got information from server oracle database: %s",
                          crabDBInfo)

        user = self.getColumn(crabDBInfo, 'tm_username')
        webdir = self.getColumn(crabDBInfo, 'tm_user_webdir')
        rootDagId = self.getColumn(
            crabDBInfo, 'clusterid')  #that's the condor id from the TW

        #Print information from the database
        self.printTaskInfo(crabDBInfo, user)
        if not rootDagId:
            self.logger.debug(
                "The task has not been submitted to the Grid scheduler yet. Not printing job information."
            )
            return crabDBInfo, None

        self.logger.debug(
            "The CRAB server submitted your task to the Grid scheduler (cluster ID: %s)"
            % rootDagId)

        if not webdir:
            # if the dag is submitted and the webdir is not there we have to wait that AdjustSites run
            # and upload the webdir location to the server
            self.logger.info(
                "Waiting for the Grid scheduler to bootstrap your task")
            self.logger.debug("Schedd has not reported back the webdir (yet)")
            return crabDBInfo, None

        self.logger.debug("Webdir is located at %s", webdir)
        # Download status_cache file
        self.logger.debug("Retrieving 'status_cache' file from webdir")
        url = webdir + '/' + "status_cache"

        statusCacheInfo = None
        try:
            statusCacheFilename = getFileFromURL(
                url, proxyfilename=self.proxyfilename)
        except ClientException as ce:
            self.logger.info(
                "Waiting for the Grid scheduler to report back the status of your task"
            )
            self.logger.debug(
                "Cannot retrieve the status_cache file. Maybe the task process has not run yet?"
            )
            self.logger.debug("Got: %s" % ce)
            return crabDBInfo, None
        else:
            with open(statusCacheFilename) as fd:
                # Skip first line of the file (it contains info for the caching script) and load job_report summary
                fd.readline()
                statusCacheInfo = literal_eval(fd.readline())
            self.logger.debug("Got information from status cache file: %s",
                              statusCacheInfo)

        self.printDAGStatus(crabDBInfo, statusCacheInfo)

        shortResult = self.printShort(statusCacheInfo)
        self.printErrors(statusCacheInfo)
        if self.options.summary:
            self.printSummary(statusCacheInfo)
        if self.options.long or self.options.sort:
            sortdict = self.printLong(statusCacheInfo,
                                      quiet=(not self.options.long))
            if self.options.sort:
                self.printSort(sortdict, self.options.sort)
        if self.options.json:
            self.logger.info(json.dumps(statusCacheInfo))

        return crabDBInfo, shortResult