Ejemplo n.º 1
0
def fixupTask(task):
    """ Fixup some values obtained by the query. """

    result = task._asdict()

    #fixup timestamps
    for field in ['tm_start_time', 'tm_start_injection', 'tm_end_injection']:
        current = result[field]
        result[field] = str(getEpochFromDBTime(current)) if current else ''

    #fixup CLOBS values by calling read (only for Oracle)
    for field in [
            'tm_task_failure', 'tm_split_args', 'tm_outfiles',
            'tm_tfile_outfiles', 'tm_edm_outfiles', 'panda_resubmitted_jobs',
            'tm_arguments', 'tm_scriptargs', 'tm_user_files', 'tm_arguments'
    ]:
        current = result[field]
        fixedCurr = current if (
            current is None or isinstance(current, str)) else current.read()
        result[field] = fixedCurr

    #liter_evaluate values
    for field in [
            'tm_site_whitelist', 'tm_site_blacklist', 'tm_split_args',
            'tm_outfiles', 'tm_tfile_outfiles', 'tm_edm_outfiles',
            'panda_resubmitted_jobs', 'tm_user_infiles', 'tm_arguments',
            'tm_scriptargs', 'tm_user_files'
    ]:
        current = result[field]
        result[field] = literal_eval(current)

    #convert tm_arguments to the desired values
    extraargs = result['tm_arguments']
    result['resubmit_publication'] = extraargs[
        'resubmit_publication'] if 'resubmit_publication' in extraargs else None
    result['resubmit_jobids'] = extraargs[
        'resubmit_jobids'] if 'resubmit_jobids' in extraargs else None
    if result[
            'resubmit_jobids'] is None and 'resubmitList' in extraargs:  ## For backward compatibility only.
        result['resubmit_jobids'] = extraargs['resubmitList']
    result['resubmit_site_whitelist'] = extraargs[
        'site_whitelist'] if 'site_whitelist' in extraargs else None
    if result[
            'resubmit_site_whitelist'] is None and 'siteWhiteList' in extraargs:  ## For backward compatibility only.
        result['resubmit_site_whitelist'] = extraargs['siteWhiteList']
    result['resubmit_site_blacklist'] = extraargs[
        'site_blacklist'] if 'site_blacklist' in extraargs else None
    if result[
            'resubmit_site_blacklist'] is None and 'siteBlackList' in extraargs:  ## For backward compatibility only.
        result['resubmit_site_blacklist'] = extraargs['siteBlackList']
    result['resubmit_maxjobruntime'] = extraargs[
        'maxjobruntime'] if 'maxjobruntime' in extraargs else None
    result['resubmit_maxmemory'] = extraargs[
        'maxmemory'] if 'maxmemory' in extraargs else None
    result['resubmit_numcores'] = extraargs[
        'numcores'] if 'numcores' in extraargs else None
    result['resubmit_priority'] = extraargs[
        'priority'] if 'priority' in extraargs else None

    return result
Ejemplo n.º 2
0
    def makeStatusReturnDict(self,
                             crabDBInfo,
                             combinedStatus,
                             dagStatus='',
                             statusFailureMsg='',
                             shortResult={},
                             statusCacheInfo={},
                             pubStatus={},
                             proxiedWebDir=''):
        """ Create a dictionary which is mostly identical to the dictionary
            that was being returned by the old status (plus a few other keys
            needed by the other client commands). This is to ensure backward
            compatibility after the status2 transition for users relying on
            this dictionary in their scripts.
        """

        statusDict = {}
        statusDict['status'] = combinedStatus
        statusDict['dbStatus'] = getColumn(crabDBInfo, 'tm_task_status')
        statusDict['dagStatus'] = dagStatus
        statusDict['username'] = getColumn(crabDBInfo, 'tm_username')
        statusDict['taskFailureMsg'] = getColumn(crabDBInfo, 'tm_task_failure')
        statusDict['taskWarningMsg'] = getColumn(crabDBInfo,
                                                 'tm_task_warnings')
        statusDict['outdatasets'] = getColumn(crabDBInfo, 'tm_output_dataset')
        statusDict['schedd'] = getColumn(crabDBInfo, 'tm_schedd')
        statusDict['collector'] = getColumn(crabDBInfo, 'tm_collector')
        statusDict['ASOURL'] = getColumn(crabDBInfo, 'tm_asourl')
        statusDict['command'] = getColumn(crabDBInfo, 'tm_task_command')
        statusDict['publicationEnabled'] = True if getColumn(
            crabDBInfo, 'tm_publication') == 'T' else False
        statusDict['userWebDirURL'] = getColumn(crabDBInfo, 'tm_user_webdir')
        statusDict['inputDataset'] = getColumn(crabDBInfo, 'tm_input_dataset')

        dbStartTime = getColumn(crabDBInfo, 'tm_start_time')
        statusDict['submissionTime'] = getEpochFromDBTime(
            datetime.strptime(dbStartTime, '%Y-%m-%d %H:%M:%S.%f'))

        statusDict['statusFailureMsg'] = statusFailureMsg
        statusDict['proxiedWebDir'] = proxiedWebDir
        statusDict['jobsPerStatus'] = shortResult.get('jobsPerStatus', {})
        statusDict['jobList'] = shortResult.get('jobList', {})
        statusDict['publication'] = pubStatus.get('status', {})
        statusDict['publicationFailures'] = pubStatus.get(
            'failure_reasons', {})
        statusDict['jobs'] = statusCacheInfo
        return statusDict
Ejemplo n.º 3
0
def fixupTask(task):
    """ Fixup some values obtained by the query. """

    result = task._asdict()

    #fixup timestamps
    for field in ['tm_start_time', 'tm_start_injection', 'tm_end_injection']:
        current = result[field]
        result[field] = str(getEpochFromDBTime(current)) if current else ''

    #fixup CLOBS values by calling read (only for Oracle)
    for field in ['tm_task_failure', 'tm_split_args', 'tm_outfiles', 'tm_tfile_outfiles', 'tm_edm_outfiles',
                  'panda_resubmitted_jobs', 'tm_arguments', 'tm_scriptargs', 'tm_user_files', 'tm_arguments']:
        current = result[field]
        fixedCurr = current if (current is None or isinstance(current, str)) else current.read()
        result[field] = fixedCurr

    #liter_evaluate values
    for field in ['tm_site_whitelist', 'tm_site_blacklist', 'tm_split_args', 'tm_outfiles', 'tm_tfile_outfiles',
                  'tm_edm_outfiles', 'panda_resubmitted_jobs', 'tm_user_infiles', 'tm_arguments', 'tm_scriptargs',
                  'tm_user_files']:
        current = result[field]
        result[field] = literal_eval(current)

    #convert tm_arguments to the desired values
    extraargs = result['tm_arguments']
    result['resubmit_publication'] = extraargs['resubmit_publication'] if 'resubmit_publication' in extraargs else None
    result['resubmit_jobids'] = extraargs['resubmit_jobids'] if 'resubmit_jobids' in extraargs else None
    if result['resubmit_jobids'] is None and 'resubmitList' in extraargs: ## For backward compatibility only.
        result['resubmit_jobids'] = extraargs['resubmitList']
    result['resubmit_site_whitelist'] = extraargs['site_whitelist'] if 'site_whitelist' in extraargs else None
    if result['resubmit_site_whitelist'] is None and 'siteWhiteList' in extraargs: ## For backward compatibility only.
        result['resubmit_site_whitelist'] = extraargs['siteWhiteList']
    result['resubmit_site_blacklist'] = extraargs['site_blacklist'] if 'site_blacklist' in extraargs else None
    if result['resubmit_site_blacklist'] is None and 'siteBlackList' in extraargs: ## For backward compatibility only.
        result['resubmit_site_blacklist'] = extraargs['siteBlackList']
    result['resubmit_maxjobruntime'] = extraargs['maxjobruntime'] if 'maxjobruntime' in extraargs else None
    result['resubmit_maxmemory'] = extraargs['maxmemory'] if 'maxmemory' in extraargs else None
    result['resubmit_numcores'] = extraargs['numcores'] if 'numcores' in extraargs else None
    result['resubmit_priority'] = extraargs['priority'] if 'priority' in extraargs else None
    result['kill_ids'] = extraargs['killList'] if 'killList' in extraargs else []
    result['kill_all'] = extraargs['killAll'] if 'killAll' in extraargs else False

    return result
Ejemplo n.º 4
0
    def resubmit2(self, workflow, publication, jobids, siteblacklist, sitewhitelist, maxjobruntime, maxmemory,
                  numcores, priority, userproxy):
        """Request to reprocess what the workflow hasn't finished to reprocess.
           This needs to create a new workflow in the same campaign
        """
        retmsg = "ok"
        self.logger.info("Getting task ID tuple from DB for task %s", workflow)
        row = self.api.query(None, None, self.Task.ID_sql, taskname = workflow)
        try:
            #just one row is picked up by the previous query
            row = self.Task.ID_tuple(*next(row))
        except StopIteration:
            raise ExecutionError("Impossible to find task %s in the database." % workflow)

        submissionTime = getEpochFromDBTime(row.start_time)

        self.logger.info("Checking if resubmission is possible: we don't allow resubmission %s days before task expiration date", NUM_DAYS_FOR_RESUBMITDRAIN)
        retmsg = checkTaskLifetime(submissionTime)
        if retmsg != "ok":
            return [{'result': retmsg}]

        task_status = row.task_status
        task_splitting = row.split_algo

        resubmitWhat = "publications" if publication else "jobs"
        self.logger.info("About to resubmit %s for workflow: %s.", resubmitWhat, workflow)

        ## Ignore the following options if this is a publication resubmission or if the
        ## task was never submitted.
        if publication or task_status == 'SUBMITFAILED':
            jobids = None
            siteblacklist, sitewhitelist, maxjobruntime, maxmemory, numcores, priority = None, None, None, None, None, None

        # We only allow resubmission of tasks that are in a final state, listed here:
        allowedTaskStates = ['SUBMITTED', 'KILLED', 'KILLFAILED', 'RESUBMITFAILED', 'FAILED']

        # Do not resubmit publication for tasks that were not submitted since they don't have any output.
        if not publication:
            allowedTaskStates += ['SUBMITFAILED'] #NB submitfailed goes to NEW, not RESUBMIT
        ## If the task status is not an allowed one, fail the resubmission.
        if task_status not in allowedTaskStates:
            msg = "You cannot resubmit %s if the task is in status %s." % (resubmitWhat, task_status)
            raise ExecutionError(msg)

        if task_status == 'KILLED' and task_splitting == 'Automatic':
            msg = "You cannot resubmit {0} if the task is in status {1} and uses automatic splitting.".format(resubmitWhat, task_status)
            raise ExecutionError(msg)

        if task_status != 'SUBMITFAILED':
            if publication:
                ## Retrieve publication information.
                publicationEnabled = row.publication
                asourl = row.asourl
                asodb = row.asodb
                username = row.username
                publicationInfo = self.publicationStatusWrapper(workflow, asourl, asodb, username, publicationEnabled)

                if 'status' not in publicationInfo:
                    msg  = "Cannot resubmit publication."
                    msg += " Unable to retrieve the publication status."
                    raise ExecutionError(msg)
                if 'disabled' in publicationInfo:
                    msg  = "Cannot resubmit publication."
                    msg += " Publication was disabled in the CRAB configuration."
                    raise ExecutionError(msg)
                if 'error' in publicationInfo:
                    msg  = "Cannot resubmit publication."
                    msg += " Error in publication status: %s" % (publicationInfo['error'])
                    raise ExecutionError(msg)
                if isCouchDBURL(asourl) and publicationInfo['status'].get('publication_failed', 0) == 0:
                    msg = "There are no failed publications to resubmit."
                    raise ExecutionError(msg)
                ## Here we can add a check on the publication status of the documents
                ## corresponding to the job ids in resubmitjobids and jobids. So far the
                ## publication resubmission will resubmit all the failed publications.
                self.resubmitPublication(asourl, asodb, userproxy, workflow)
                return [{'result': retmsg}]
            else:
                self.logger.info("Jobs to resubmit: %s", jobids)

            ## If these parameters are not set, give them the same values they had in the
            ## original task submission.
            if (siteblacklist is None) or (sitewhitelist is None) or (maxjobruntime is None) or (maxmemory is None) or (numcores is None) or (priority is None):
                ## origValues = [orig_siteblacklist, orig_sitewhitelist, orig_maxjobruntime, orig_maxmemory, orig_numcores, orig_priority]
                origValues = next(self.api.query(None, None, self.Task.GetResubmitParams_sql, taskname = workflow))
                if siteblacklist is None:
                    siteblacklist = literal_eval(origValues[0])
                if sitewhitelist is None:
                    sitewhitelist = literal_eval(origValues[1])
                if maxjobruntime is None:
                    maxjobruntime = origValues[2]
                if maxmemory is None:
                    maxmemory = origValues[3]
                if numcores is None:
                    numcores = origValues[4]
                if priority is None:
                    priority = origValues[5]
            ## These are the parameters that we want to writte down in the 'tm_arguments'
            ## column of the Tasks DB each time a resubmission is done.
            ## DagmanResubmitter will read these parameters and write them into the task ad.
            arguments = {'resubmit_jobids' : jobids,
                         'site_blacklist'  : siteblacklist,
                         'site_whitelist'  : sitewhitelist,
                         'maxjobruntime'   : maxjobruntime,
                         'maxmemory'       : maxmemory,
                         'numcores'        : numcores,
                         'priority'        : priority,
                         'resubmit_publication' : publication
                        }
            ## Change the 'tm_arguments' column of the Tasks DB for this task to contain the
            ## above parameters.
            self.api.modify(self.Task.SetArgumentsTask_sql, taskname = [workflow], arguments = [str(arguments)])

        ## Change the status of the task in the Tasks DB to RESUBMIT (or NEW).
        if task_status == 'SUBMITFAILED':
            newstate = ["NEW"]
            newcommand = ["SUBMIT"]
        else:
            newstate = ["NEW"]
            newcommand = ["RESUBMIT"]
        self.api.modify(self.Task.SetStatusTask_sql, status = newstate, command = newcommand, taskname = [workflow])
        return [{'result': retmsg}]
Ejemplo n.º 5
0
    def resubmit2(self, workflow, publication, jobids, siteblacklist, sitewhitelist, maxjobruntime, maxmemory,
                  numcores, priority, userproxy):
        """Request to reprocess what the workflow hasn't finished to reprocess.
           This needs to create a new workflow in the same campaign
        """
        retmsg = "ok"
        self.logger.info("Getting task ID tuple from DB for task %s" % workflow)
        row = self.api.query(None, None, self.Task.ID_sql, taskname = workflow)
        try:
            #just one row is picked up by the previous query
            row = self.Task.ID_tuple(*next(row))
        except StopIteration:
            raise ExecutionError("Impossible to find task %s in the database." % workflow)

        submissionTime = getEpochFromDBTime(row.start_time)

        self.logger.info("Checking if resubmission is possible: we don't allow resubmission %s days before task expiration date", NUM_DAYS_FOR_RESUBMITDRAIN)
        retmsg = self.checkTaskLifetime(submissionTime)
        if retmsg != "ok":
            return [{'result': retmsg}]

        task_status = row.task_status

        resubmitWhat = "publications" if publication else "jobs"
        self.logger.info("About to resubmit %s for workflow: %s." % (resubmitWhat, workflow))

        ## Ignore the following options if this is a publication resubmission or if the
        ## task was never submitted.
        if publication or task_status == 'SUBMITFAILED':
            jobids = None
            siteblacklist, sitewhitelist, maxjobruntime, maxmemory, numcores, priority = None, None, None, None, None, None

        # We only allow resubmission of tasks that are in a final state, listed here:
        allowedTaskStates = ['SUBMITTED', 'KILLED', 'KILLFAILED', 'RESUBMITFAILED', 'FAILED']

        # Do not resubmit publication for tasks that were not submitted since they don't have any output.
        if not publication:
            allowedTaskStates += ['SUBMITFAILED'] #NB submitfailed goes to NEW, not RESUBMIT
        ## If the task status is not an allowed one, fail the resubmission.
        if task_status not in allowedTaskStates:
            msg = "You cannot resubmit %s if the task is in status %s." % (resubmitWhat, task_status)
            raise ExecutionError(msg)

        if task_status != 'SUBMITFAILED':
            if publication:
                ## Retrieve publication information.
                publicationEnabled = row.publication
                asourl = row.asourl
                asodb = row.asodb
                username = row.username
                publicationInfo = self.publicationStatusWrapper(workflow, asourl, asodb, username, publicationEnabled)

                if 'status' not in publicationInfo:
                    msg  = "Cannot resubmit publication."
                    msg += " Unable to retrieve the publication status."
                    raise ExecutionError(msg)
                if 'disabled' in publicationInfo:
                    msg  = "Cannot resubmit publication."
                    msg += " Publication was disabled in the CRAB configuration."
                    raise ExecutionError(msg)
                if 'error' in publicationInfo:
                    msg  = "Cannot resubmit publication."
                    msg += " Error in publication status: %s" % (publicationInfo['error'])
                    raise ExecutionError(msg)
                if publicationInfo['status'].get('publication_failed', 0) == 0:
                    msg = "There are no failed publications to resubmit."
                    raise ExecutionError(msg)
                ## Here we can add a check on the publication status of the documents
                ## corresponding to the job ids in resubmitjobids and jobids. So far the
                ## publication resubmission will resubmit all the failed publications.
                self.resubmitPublication(asourl, asodb, userproxy, workflow)
                return [{'result': retmsg}]
            else:
                self.logger.info("Jobs to resubmit: %s" % (jobids))

            ## If these parameters are not set, give them the same values they had in the
            ## original task submission.
            if (siteblacklist is None) or (sitewhitelist is None) or (maxjobruntime is None) or (maxmemory is None) or (numcores is None) or (priority is None):
                ## origValues = [orig_siteblacklist, orig_sitewhitelist, orig_maxjobruntime, orig_maxmemory, orig_numcores, orig_priority]
                origValues = next(self.api.query(None, None, self.Task.GetResubmitParams_sql, taskname = workflow))
                if siteblacklist is None:
                    siteblacklist = literal_eval(origValues[0])
                if sitewhitelist is None:
                    sitewhitelist = literal_eval(origValues[1])
                if maxjobruntime is None:
                    maxjobruntime = origValues[2]
                if maxmemory is None:
                    maxmemory = origValues[3]
                if numcores is None:
                    numcores = origValues[4]
                if priority is None:
                    priority = origValues[5]
            ## These are the parameters that we want to writte down in the 'tm_arguments'
            ## column of the Tasks DB each time a resubmission is done.
            ## DagmanResubmitter will read these parameters and write them into the task ad.
            arguments = {'resubmit_jobids' : jobids,
                         'site_blacklist'  : siteblacklist,
                         'site_whitelist'  : sitewhitelist,
                         'maxjobruntime'   : maxjobruntime,
                         'maxmemory'       : maxmemory,
                         'numcores'        : numcores,
                         'priority'        : priority,
                         'resubmit_publication' : publication
                        }
            ## Change the 'tm_arguments' column of the Tasks DB for this task to contain the
            ## above parameters.
            self.api.modify(self.Task.SetArgumentsTask_sql, taskname = [workflow], arguments = [str(arguments)])

        ## Change the status of the task in the Tasks DB to RESUBMIT (or NEW).
        if task_status == 'SUBMITFAILED':
            newstate = ["NEW"]
            newcommand = ["SUBMIT"]
        else:
            newstate = ["NEW"]
            newcommand = ["RESUBMIT"]
        self.api.modify(self.Task.SetStatusTask_sql, status = newstate, command = newcommand, taskname = [workflow])
        return [{'result': retmsg}]
Ejemplo n.º 6
0
def fixupTask(task):
    """ Fixup some values obtained by the query. """

    result = task._asdict()

    # fixup timestamps
    for field in ["tm_start_time", "tm_start_injection", "tm_end_injection"]:
        current = result[field]
        result[field] = str(getEpochFromDBTime(current)) if current else ""

    # fixup CLOBS values by calling read (only for Oracle)
    for field in [
        "tm_task_failure",
        "tm_split_args",
        "tm_outfiles",
        "tm_tfile_outfiles",
        "tm_edm_outfiles",
        "panda_resubmitted_jobs",
        "tm_arguments",
        "tm_scriptargs",
        "tm_user_files",
        "tm_arguments",
    ]:
        current = result[field]
        fixedCurr = current if (current is None or isinstance(current, str)) else current.read()
        result[field] = fixedCurr

    # liter_evaluate values
    for field in [
        "tm_site_whitelist",
        "tm_site_blacklist",
        "tm_split_args",
        "tm_outfiles",
        "tm_tfile_outfiles",
        "tm_edm_outfiles",
        "panda_resubmitted_jobs",
        "tm_user_infiles",
        "tm_arguments",
        "tm_scriptargs",
        "tm_user_files",
    ]:
        current = result[field]
        result[field] = literal_eval(current)

    # convert tm_arguments to the desired values
    extraargs = result["tm_arguments"]
    result["resubmit_publication"] = extraargs["resubmit_publication"] if "resubmit_publication" in extraargs else None
    result["resubmit_jobids"] = extraargs["resubmit_jobids"] if "resubmit_jobids" in extraargs else None
    if result["resubmit_jobids"] is None and "resubmitList" in extraargs:  ## For backward compatibility only.
        result["resubmit_jobids"] = extraargs["resubmitList"]
    result["resubmit_site_whitelist"] = extraargs["site_whitelist"] if "site_whitelist" in extraargs else None
    if result["resubmit_site_whitelist"] is None and "siteWhiteList" in extraargs:  ## For backward compatibility only.
        result["resubmit_site_whitelist"] = extraargs["siteWhiteList"]
    result["resubmit_site_blacklist"] = extraargs["site_blacklist"] if "site_blacklist" in extraargs else None
    if result["resubmit_site_blacklist"] is None and "siteBlackList" in extraargs:  ## For backward compatibility only.
        result["resubmit_site_blacklist"] = extraargs["siteBlackList"]
    result["resubmit_maxjobruntime"] = extraargs["maxjobruntime"] if "maxjobruntime" in extraargs else None
    result["resubmit_maxmemory"] = extraargs["maxmemory"] if "maxmemory" in extraargs else None
    result["resubmit_numcores"] = extraargs["numcores"] if "numcores" in extraargs else None
    result["resubmit_priority"] = extraargs["priority"] if "priority" in extraargs else None
    result["kill_ids"] = extraargs["killList"] if "killList" in extraargs else []

    return result
Ejemplo n.º 7
0
    def status(self, workflow, userdn, userproxy=None):
        """Retrieve the status of the workflow.

           :arg str workflow: a valid workflow name
           :return: a workflow status summary document"""

        #Empty results
        result = {
            "status": '',  #from the db
            "command": '',  #from the db
            "taskFailureMsg": '',  #from the db
            "taskWarningMsg": [],  #from the db
            "submissionTime": 0,  #from the db
            "statusFailureMsg": '',  #errors of the status itself
            "jobList": [],
            "schedd": '',  #from the db
            "splitting": '',  #from the db
            "taskWorker": '',  #from the db
            "webdirPath": '',  #from the db
            "username": ''
        }  #from the db

        # First, verify the task has been submitted by the backend.
        self.logger.info("Got status request for workflow %s" % workflow)
        row = self.api.query(None, None, self.Task.ID_sql, taskname=workflow)
        try:
            #just one row is picked up by the previous query
            row = self.Task.ID_tuple(*next(row))
        except StopIteration:
            raise ExecutionError(
                "Impossible to find task %s in the database." % workflow)

        result['submissionTime'] = getEpochFromDBTime(row.start_time)
        if row.task_command:
            result['command'] = row.task_command

        ## Add scheduler and collector to the result dictionary.
        if row.username:
            result['username'] = row.username
        if row.user_webdir:
            result['webdirPath'] = '/'.join(['/home/grid'] +
                                            row.user_webdir.split('/')[-2:])
        if row.schedd:
            result['schedd'] = row.schedd
        if row.twname:
            result['taskWorker'] = row.twname
        if row.split_algo:
            result['splitting'] = row.split_algo

        self.asoDBURL = row.asourl

        # 0 - simple crab status
        # 1 - crab status -long
        # 2 - crab status -idle
        self.logger.info("Status result for workflow %s: %s " %
                         (workflow, row.task_status))

        ## Apply taskWarning flag to output.
        taskWarnings = literal_eval(row.task_warnings if isinstance(
            row.task_warnings, str) else row.task_warnings.read())
        result["taskWarningMsg"] = taskWarnings

        ## Helper function to add the task status and the failure message (both as taken
        ## from the Task DB) to the result dictionary.
        def addStatusAndFailureFromDB(result, row):
            result['status'] = row.task_status
            if row.task_failure is not None:
                if isinstance(row.task_failure, str):
                    result['taskFailureMsg'] = row.task_failure
                else:
                    result['taskFailureMsg'] = row.task_failure.read()

        ## Helper function to add a failure message in retrieving the task/jobs status
        ## (and eventually a task status if there was none) to the result dictionary.
        def addStatusAndFailure(result, status, failure=None):
            if not result['status']:
                result['status'] = status
            if failure:
                #if not result['statusFailureMsg']:
                result['statusFailureMsg'] = failure
                #else:
                #    result['statusFailureMsg'] += "\n%s" % (failure)

        #get rid of this? If there is a clusterid we go ahead and get jobs info, otherwise we return result
        self.logger.debug("Cluster id: %s" % row.clusterid)
        if row.task_status in [
                'NEW', 'HOLDING', 'UPLOADED', 'SUBMITFAILED', 'KILLFAILED',
                'RESUBMITFAILED', 'FAILED'
        ]:
            addStatusAndFailureFromDB(result, row)
            if row.task_status in [
                    'NEW', 'UPLOADED', 'SUBMITFAILED'
            ] and row.task_command not in ['KILL', 'RESUBMIT']:
                self.logger.debug("Detailed result for workflow %s: %s\n" %
                                  (workflow, result))
                return [result]
        #even if we get rid these two should be filled
#                  "taskFailureMsg"   : '', #from the db
#                  "taskWarningMsg"   : [], #from the db

#here we know we have a clusterid. But what if webdir is not there? return setting a proper statusFailureMsg
#Now what to do
#    get node_state/job_log from the schedd. Needs Justas patch (is it ok?)
#    get error_report
#    get aso_status (it is going to change once we are done whith the oracle implementation)
#    combine everything

## Here we start to retrieve the jobs statuses.
        jobsPerStatus = {}
        taskJobCount = 0
        taskStatus = {}
        jobList = []
        results = []
        # task_codes are used if condor_q command is done to retrieve task status
        task_codes = {
            1: 'SUBMITTED',
            2: 'SUBMITTED',
            4: 'COMPLETED',
            5: 'KILLED'
        }
        # dagman_codes are used if task status retrieved using node_state file
        # 1 = STATUS_READY (Means that task was not yet started)
        # 2 = STATUS_PRERUN (Means that task is doing PRE run)
        # 3 = STATUS_SUBMITTED (Means that task is submitted)
        # 4 = STATUS_POSTRUN (Means that task in PostRun)
        # 5 = STATUS_DONE (Means that task is Done)
        # 6 = STATUS_ERROR (Means that task is Failed/Killed)
        dagman_codes = {
            1: 'SUBMITTED',
            2: 'SUBMITTED',
            3: 'SUBMITTED',
            4: 'SUBMITTED',
            5: 'COMPLETED',
            6: 'FAILED'
        }
        # User web directory is needed for getting files from scheduler.
        if not row.user_webdir:
            self.logger.error(
                "webdir not found in DB. Impossible to retrieve task status")
            addStatusAndFailure(result,
                                status='UNKNOWN',
                                failure='missing webdir info')
            return [result]
        else:
            self.logger.info(
                "Getting status for workflow %s using node state file.",
                workflow)
            try:
                taskStatus = self.taskWebStatus(
                    {'CRAB_UserWebDir': row.user_webdir}, result)
                #Check timestamp, if older then 2 minutes warn about stale info
                nodeStateUpd = int(
                    taskStatus.get('DagStatus', {}).get("Timestamp", 0))
                DAGStatus = int(
                    taskStatus.get('DagStatus', {}).get('DagStatus', -1))
                epochTime = int(time.time())
                # If DAGStatus is 5 or 6, it means it is final state and node_state file will not be updated anymore
                # and there is no need to query schedd to get information about task.
                # If not, we check when the last time file was updated. It should update every 30s, which is set in
                # job classad:
                # https://github.com/dmwm/CRABServer/blob/5caac0d379f5e4522f026eeaf3621f7eb5ced98e/src/python/TaskWorker/Actions/DagmanCreator.py#L39
                if (nodeStateUpd > 0 and
                    (int(epochTime - nodeStateUpd) < 120)) or DAGStatus in [
                        5, 6
                    ]:
                    self.logger.info("Node state is up to date, using it")
                    taskJobCount = int(
                        taskStatus.get('DagStatus', {}).get('NodesTotal'))
                    self.logger.info(taskStatus)
                    if row.task_status in [
                            'QUEUED', 'KILLED', 'KILLFAILED', 'RESUBMITFAILED',
                            'FAILED'
                    ]:
                        result['status'] = row.task_status
                    else:
                        result['status'] = dagman_codes.get(
                            DAGStatus, row.task_status)
                    # make sure taskStatusCode is defined
                    if result['status'] in ['KILLED', 'KILLFAILED']:
                        taskStatusCode = 5
                    else:
                        taskStatusCode = 1
                else:
                    self.logger.info(
                        "Node state file is too old or does not have an update time. Stale info is shown"
                    )
            except Exception as ee:
                addStatusAndFailure(result, status='UNKNOWN', failure=ee.info)
                return [result]

        if 'DagStatus' in taskStatus:
            del taskStatus['DagStatus']

        for i in range(1, taskJobCount + 1):
            i = str(i)
            if i not in taskStatus:
                if taskStatusCode == 5:
                    taskStatus[i] = {'State': 'killed'}
                else:
                    taskStatus[i] = {'State': 'unsubmitted'}

        for job, info in taskStatus.items():
            status = info['State']
            jobsPerStatus.setdefault(status, 0)
            jobsPerStatus[status] += 1
            jobList.append((status, job))
        result['jobList'] = jobList
        #result['jobs'] = taskStatus

        if len(taskStatus) == 0 and results and results['JobStatus'] == 2:
            result['status'] = 'Running (jobs not submitted)'

        #Always returning ASOURL also, it is required for kill, resubmit
        self.logger.info("ASO: %s" % row.asourl)
        result['ASOURL'] = row.asourl

        ## Retrieve publication information.
        publicationInfo = {}
        if (row.publication == 'T' and 'finished' in jobsPerStatus):
            #let's default asodb to asynctransfer, for old task this is empty!
            asodb = row.asodb or 'asynctransfer'
            publicationInfo = self.publicationStatus(workflow, row.asourl,
                                                     asodb, row.username)
            self.logger.info("Publication status for workflow %s done",
                             workflow)
        elif (row.publication == 'F'):
            publicationInfo['status'] = {'disabled': []}
        else:
            self.logger.info(
                "No files to publish: Publish flag %s, files transferred: %s" %
                (row.publication, jobsPerStatus.get('finished', 0)))
        result['publication'] = publicationInfo.get('status', {})
        result['publicationFailures'] = publicationInfo.get(
            'failure_reasons', {})

        ## The output datasets are written into the Task DB by the post-job
        ## when uploading the output files metadata.
        outdatasets = literal_eval(
            row.output_dataset.read() if row.output_dataset else 'None')
        result['outdatasets'] = outdatasets

        return [result]