Python SchedulerParsingError Examples

Programming Language: Python

Namespace/Package Name: aiida.scheduler

Examples at hotexamples.com: 3

Python SchedulerParsingError - 3 examples found. These are the top rated real world Python examples of aiida.scheduler.SchedulerParsingError extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SchedulerParsingError(3)

Frequently Used Methods

SchedulerParsingError (3)

Example #1

Show file

File: lsf.py Project: kamatani0164/aiida-core

    def _parse_submit_output(self, retval, stdout, stderr):
        """
        Parse the output of the submit command, as returned by executing the
        command returned by _get_submit_command command.
        
        To be implemented by the plugin.
        
        Return a string with the JobID.
        """
        if retval != 0:
            self.logger.error("Error in _parse_submit_output: retval={}; "
                              "stdout={}; stderr={}".format(
                                  retval, stdout, stderr))
            raise SchedulerError("Error during submission, retval={}\n"
                                 "stdout={}\nstderr={}".format(
                                     retval, stdout, stderr))

        try:
            transport_string = " for {}".format(self.transport)
        except SchedulerError:
            transport_string = ""

        if stderr.strip():
            self.logger.warning("in _parse_submit_output{}: "
                                "there was some text in stderr: {}".format(
                                    transport_string, stderr))

        try:
            return stdout.strip().split('Job <')[1].split('>')[0]
        except IndexError:
            raise SchedulerParsingError("Cannot parse submission output: {}"
                                        "".format(stdout))

Example #2

Show file

File: pbsbaseclasses.py Project: kamatani0164/aiida-core

    def _parse_joblist_output(self, retval, stdout, stderr):
        """
        Parse the queue output string, as returned by executing the
        command returned by _get_joblist_command command (qstat -f).

        Return a list of JobInfo objects, one of each job,
        each relevant parameters implemented.

        Note: depending on the scheduler configuration, finished jobs may
            either appear here, or not.
            This function will only return one element for each job find
            in the qstat output; missing jobs (for whatever reason) simply
            will not appear here.
        """
        # I don't raise because if I pass a list of jobs, I get a non-zero status
        # if one of the job is not in the list anymore

        # retval should be zero
        #if retval != 0:
        #self.logger.warning("Error in _parse_joblist_output: retval={}; "
        #    "stdout={}; stderr={}".format(retval, stdout, stderr))

        # issue a warning if there is any stderr output
        # but I strip lines containing "Unknown Job Id", that happens
        # also when I ask for a calculation that has finished
        #
        # I also strip for "Job has finished" because this happens for
        # those schedulers configured to leave the job in the output
        # of qstat for some time after job completion.
        filtered_stderr = '\n'.join(
            l for l in stderr.split('\n') if "Unknown Job Id" not in l and "Job has finished" not in l)
        if filtered_stderr.strip():
            self.logger.warning("Warning in _parse_joblist_output, non-empty "
                                "(filtered) stderr='{}'".format(filtered_stderr))
            if retval != 0:
                raise SchedulerError(
                    "Error during qstat parsing (_parse_joblist_output function)")

        jobdata_raw = []  # will contain raw data parsed from qstat output
        # Get raw data and split in lines
        for line_num, l in enumerate(stdout.split('\n'), start=1):
            # Each new job stanza starts with the string 'Job Id:': I
            # create a new item in the jobdata_raw list
            if l.startswith('Job Id:'):
                jobdata_raw.append(
                    {'id': l.split(':', 1)[1].strip(),
                     'lines': [], 'warning_lines_idx': []})
                # warning_lines_idx: lines that do not start either with
                # tab or space
            else:
                if l.strip():
                    # This is a non-empty line, therefore it is an attribute
                    # of the last job found
                    if not jobdata_raw:
                        # The list is still empty! (This means that I found a
                        # non-empty line, before finding the first 'Job Id:'
                        # string: it is an error. However this may happen
                        # only before the first job.
                        raise SchedulerParsingError("I did not find the header for the first job")
                        #self.logger.warning("I found some text before the "
                        #"first job: {}".format(l))
                    else:
                        if l.startswith(' '):
                            # If it starts with a space, it is a new field
                            jobdata_raw[-1]['lines'].append(l)
                        elif l.startswith('\t'):
                            # If a line starts with a TAB,
                            # I append to the previous string
                            # stripping the TAB
                            if not jobdata_raw[-1]['lines']:
                                raise SchedulerParsingError(
                                    "Line {} is the first line of the job, but it "
                                    "starts with a TAB! ({})".format(line_num, l))
                            jobdata_raw[-1]['lines'][-1] += l[1:]
                        else:
                            #raise SchedulerParsingError(
                            #    "Wrong starting character at line {}! ({})"
                            #    "".format(line_num, l))
                            ## For some reasons, the output of 'comment' and
                            ## 'Variable_List', for instance, can have
                            ## newlines if they are included... # I do a
                            ## workaround
                            jobdata_raw[-1]['lines'][-1] += "\n{}".format(l)
                            jobdata_raw[-1]['warning_lines_idx'].append(
                                len(jobdata_raw[-1]['lines']) - 1)

        # Create dictionary and parse specific fields
        job_list = []
        for job in jobdata_raw:
            this_job = JobInfo()
            this_job.job_id = job['id']

            lines_without_equals_sign = [i for i in job['lines']
                                         if '=' not in i]

            # There are lines without equals sign: this is bad
            if lines_without_equals_sign:
                # Should I only warn?
                self.logger.error("There are lines without equals sign! {}"
                                  "".format(lines_without_equals_sign))
                raise (SchedulerParsingError("There are lines without equals "
                                             "sign."))

            raw_data = {i.split('=', 1)[0].strip().lower():
                            i.split('=', 1)[1].lstrip()
                        for i in job['lines'] if '=' in i}

            ## I ignore the errors for the time being - this seems to be
            ## a problem if there are \n in the content of some variables?
            ## I consider this a workaround...
            #for line_with_warning in set(job['warning_lines_idx']):
            #    if job['lines'][line_with_warning].split(
            #        '=',1)[0].strip().lower() != "comment":
            #        raise SchedulerParsingError(
            #            "Wrong starting character in one of the lines "
            #            "of job {}, and it's not a comment! ({})"
            #            "".format(this_job.job_id,
            #                      job['lines'][line_with_warning]))

            problematic_fields = []
            for line_with_warning in set(job['warning_lines_idx']):
                problematic_fields.append(job['lines'][line_with_warning].split(
                    '=', 1)[0].strip().lower())
            if problematic_fields:
                # These are the fields that contain unexpected newlines
                raw_data['warning_fields_with_newlines'] = problematic_fields

            # I believe that exit_status and terminating_signal cannot be
            # retrieved from the qstat -f output.

            # I wrap calls in try-except clauses to avoid errors if a field
            # is missing
            try:
                this_job.title = raw_data['job_name']
            except KeyError:
                self.logger.debug("No 'job_name' field for job id "
                                  "{}".format(this_job.job_id))

            try:
                this_job.annotation = raw_data['comment']
            except KeyError:
                # Many jobs do not have a comment; I do not complain about it.
                pass
                #self.logger.debug("No 'comment' field for job id {}".format(
                #    this_job.job_id))

            try:
                job_state_string = raw_data['job_state']
                try:
                    this_job.job_state = self._map_status[job_state_string]
                except KeyError:
                    self.logger.warning("Unrecognized job_state '{}' for job "
                                        "id {}".format(job_state_string,
                                                       this_job.job_id))
                    this_job.job_state = job_states.UNDETERMINED
            except KeyError:
                self.logger.debug("No 'job_state' field for job id {}".format(
                    this_job.job_id))
                this_job.job_state = job_states.UNDETERMINED

            try:
                this_job.job_substate = raw_data['substate']
            except KeyError:
                self.logger.debug("No 'substate' field for job id {}".format(
                    this_job.job_id))

            try:
                exec_hosts = raw_data['exec_host'].split('+')
            except KeyError:
                # No exec_host information found (it may be ok, if the job
                # is not running)
                pass
            else:
                # parse each host; syntax, from the man page:
                # hosta/J1+hostb/J2*P+...
                # where  J1 and J2 are an index of the job
                # on the named host and P is the number of
                # processors allocated from that host to this job.
                # P does not appear if it is 1.
                try:
                    exec_host_list = []
                    for exec_host in exec_hosts:
                        node = MachineInfo()
                        node.name, data = exec_host.split('/')
                        data = data.split('*')
                        if len(data) == 1:
                            node.jobIndex = int(data[0])
                            node.num_cpus = 1
                        elif len(data) == 2:
                            node.jobIndex = int(data[0])
                            node.num_cpus = int(data[1])
                        else:
                            raise ValueError("Wrong number of pieces: {} "
                                             "instead of 1 or 2 in exec_hosts: "
                                             "{}".format(len(data), exec_hosts))
                        exec_host_list.append(node)
                    this_job.allocated_machines = exec_host_list
                except Exception as e:
                    self.logger.debug("Problem parsing the node names, I "
                                      "got Exception {} with message {}; "
                                      "exec_hosts was {}".format(
                        str(type(e)), e.message, exec_hosts))

            try:
                # I strip the part after the @: is this always ok?
                this_job.job_owner = raw_data['job_owner'].split('@')[0]
            except KeyError:
                self.logger.debug("No 'job_owner' field for job id {}".format(
                    this_job.job_id))

            try:
                this_job.num_cpus = int(raw_data['resource_list.ncpus'])
                # TODO: understand if this is the correct field also for
                #       multithreaded (OpenMP) jobs.
            except KeyError:
                self.logger.debug("No 'resource_list.ncpus' field for job id "
                                  "{}".format(this_job.job_id))
            except ValueError:
                self.logger.warning("'resource_list.ncpus' is not an integer "
                                    "({}) for job id {}!".format(
                    raw_data['resource_list.ncpus'],
                    this_job.job_id))

            try:
                this_job.num_mpiprocs = int(raw_data['resource_list.mpiprocs'])
                # TODO: understand if this is the correct field also for
                #       multithreaded (OpenMP) jobs.
            except KeyError:
                self.logger.debug("No 'resource_list.mpiprocs' field for job id "
                                  "{}".format(this_job.job_id))
            except ValueError:
                self.logger.warning("'resource_list.mpiprocs' is not an integer "
                                    "({}) for job id {}!".format(
                    raw_data['resource_list.mpiprocs'],
                    this_job.job_id))

            try:
                this_job.num_machines = int(raw_data['resource_list.nodect'])
            except KeyError:
                self.logger.debug("No 'resource_list.nodect' field for job id "
                                  "{}".format(this_job.job_id))
            except ValueError:
                self.logger.warning("'resource_list.nodect' is not an integer "
                                    "({}) for job id {}!".format(
                    raw_data['resource_list.nodect'],
                    this_job.job_id))

            # Double check of redundant info
            if (this_job.allocated_machines is not None and
                        this_job.num_machines is not None):
                if len(this_job.allocated_machines) != this_job.num_machines:
                    self.logger.error("The length of the list of allocated "
                                      "nodes ({}) is different from the "
                                      "expected number of nodes ({})!".format(
                        len(this_job.allocated_machines), this_job.num_machines))

            try:
                this_job.queue_name = raw_data['queue']
            except KeyError:
                self.logger.debug("No 'queue' field for job id "
                                  "{}".format(this_job.job_id))

            try:
                this_job.RequestedWallclockTime = (self._convert_time(
                    raw_data['resource_list.walltime']))
            except KeyError:
                self.logger.debug("No 'resource_list.walltime' field for "
                                  "job id {}".format(this_job.job_id))
            except ValueError:
                self.logger.warning("Error parsing 'resource_list.walltime' "
                                    "for job id {}".format(this_job.job_id))

            try:
                this_job.wallclock_time_seconds = (self._convert_time(
                    raw_data['resources_used.walltime']))
            except KeyError:
                # May not have started yet
                pass
            except ValueError:
                self.logger.warning("Error parsing 'resources_used.walltime' "
                                    "for job id {}".format(this_job.job_id))

            try:
                this_job.cpu_time = (self._convert_time(
                    raw_data['resources_used.cput']))
            except KeyError:
                # May not have started yet
                pass
            except ValueError:
                self.logger.warning("Error parsing 'resources_used.cput' "
                                    "for job id {}".format(this_job.job_id))

            #
            # ctime: The time that the job was created
            # mtime: The time that the job was last modified, changed state,
            #        or changed locations.
            # qtime: The time that the job entered the current queue
            # stime: The time when the job started execution.
            # etime: The time that the job became eligible to run, i.e. in a
            #        queued state while residing in an execution queue.

            try:
                this_job.submission_time = self._parse_time_string(
                    raw_data['ctime'])
            except KeyError:
                self.logger.debug("No 'ctime' field for job id "
                                  "{}".format(this_job.job_id))
            except ValueError:
                self.logger.warning("Error parsing 'ctime' for job id "
                                    "{}".format(this_job.job_id))

            try:
                this_job.dispatch_time = self._parse_time_string(
                    raw_data['stime'])
            except KeyError:
                # The job may not have been started yet
                pass
            except ValueError:
                self.logger.warning("Error parsing 'stime' for job id "
                                    "{}".format(this_job.job_id))

            # TODO: see if we want to set also finish_time for finished jobs,
            # if there are any

            # Everything goes here anyway for debugging purposes
            this_job.raw_data = raw_data

            # I append to the list of jobs to return
            job_list.append(this_job)

        return job_list

Example #3

Show file

File: sge.py Project: kamatani0164/aiida-core

    def _parse_joblist_output(self, retval, stdout, stderr):
        import xml.dom.minidom

        if retval != 0:
            self.logger.error("Error in _parse_joblist_output: retval={}; "
                              "stdout={}; stderr={}".format(
                                  retval, stdout, stderr))
            raise SchedulerError("Error during joblist retrieval, retval={}".\
                                 format(retval))

        if stderr.strip():
            self.logger.warning("in _parse_joblist_output for {}: "
                                "there was some text in stderr: {}".format(
                                    str(self.transport), stderr))

        if stdout:
            try:
                xmldata = xml.dom.minidom.parseString(stdout)
            except xml.parsers.expat.ExpatError:
                self.logger.error("in sge._parse_joblist_output: "
                                  "xml parsing of stdout failed:"
                                  "{}".format(stdout))
                raise SchedulerParsingError("Error during joblist retrieval,"
                                            "xml parsing of stdout failed")
        else:
            self.logger.error("Error in sge._parse_joblist_output: retval={}; "
                              "stdout={}; stderr={}".format(
                                  retval, stdout, stderr))
            raise SchedulerError("Error during joblist retrieval,"
                                 "no stdout produced")

        try:
            first_child = xmldata.firstChild
            second_childs = first_child.childNodes
            tag_names_sec = [elem.tagName for elem in second_childs \
                             if elem.nodeType == 1]
            if not 'queue_info' in tag_names_sec:
                self.logger.error("Error in sge._parse_joblist_output: "
                                  "no queue_info: {}".\
                                  format(stdout))
                raise SchedulerError
            if not 'job_info' in tag_names_sec:
                self.logger.error("Error in sge._parse_joblist_output: "
                                  "no job_info: {}".\
                                  format(stdout))
                raise SchedulerError
        except SchedulerError:
            self.logger.error("Error in sge._parse_joblist_output: stdout={}"\
                              .format(stdout))
            raise SchedulerError("Error during xml processing, of stdout:"
                                 "There is no 'job_info' or no 'queue_info'"
                                 "element or there are no jobs!")
        #If something weird happens while firstChild, pop, etc:
        except Exception:
            self.logger.error("Error in sge._parse_joblist_output: stdout={}"\
                              .format(stdout))
            raise SchedulerError("Error during xml processing, of stdout")

        jobs = [i for i in first_child.getElementsByTagName('job_list')]
        #jobs = [i for i in jobinfo.getElementsByTagName('job_list')]
        #print [i[0].childNodes[0].data for i in job_numbers if i]
        joblist = []
        for job in jobs:
            this_job = JobInfo()

            #In case the user needs more information the xml-data for
            #each job is stored:
            this_job.raw_data = job.toxml()

            try:
                job_element = job.getElementsByTagName('JB_job_number').pop(0)
                element_child = job_element.childNodes.pop(0)
                this_job.job_id = str(element_child.data).strip()
                if not this_job.job_id:
                    raise SchedulerError
            except SchedulerError:
                self.logger.error("Error in sge._parse_joblist_output:"
                                  "no job id is given, stdout={}"\
                                  .format(stdout))
                raise SchedulerError("Error in sge._parse_joblist_output:"
                                     "no job id is given")
            except IndexError:
                self.logger.error("No 'job_number' given for job index {} in "
                                  "job list, stdout={}".format(jobs.index(job)\
                                  ,stdout))
                raise IndexError("Error in sge._parse_joblist_output:"
                                 "no job id is given")

            try:
                job_element = job.getElementsByTagName('state').pop(0)
                element_child = job_element.childNodes.pop(0)
                job_state_string = str(element_child.data).strip()
                try:
                    this_job.job_state = _map_status_sge[job_state_string]
                except KeyError:
                    self.logger.warning("Unrecognized job_state '{}' for job "
                                        "id {}".format(job_state_string,
                                                       this_job.job_id))
                    this_job.job_state = job_states.UNDETERMINED
            except IndexError:
                self.logger.warning("No 'job_state' field for job id {} in"
                                    "stdout={}".format(this_job.job_id,
                                                       stdout))
                this_job.job_state = job_states.UNDETERMINED

            try:
                job_element = job.getElementsByTagName('JB_owner').pop(0)
                element_child = job_element.childNodes.pop(0)
                this_job.job_owner = str(element_child.data).strip()
            except IndexError:
                self.logger.warning("No 'job_owner' field for job "
                                    "id {}".format(this_job.job_id))

            try:
                job_element = job.getElementsByTagName('JB_name').pop(0)
                element_child = job_element.childNodes.pop(0)
                this_job.title = str(element_child.data).strip()
            except IndexError:
                self.logger.warning("No 'title' field for job "
                                    "id {}".format(this_job.job_id))

            try:
                job_element = job.getElementsByTagName('queue_name').pop(0)
                element_child = job_element.childNodes.pop(0)
                this_job.queue_name = str(element_child.data).strip()
            except IndexError:
                if this_job.job_state == job_states.RUNNING:
                    self.logger.warning("No 'queue_name' field for job "
                                        "id {}".format(this_job.job_id))

            try:
                job_element = job.getElementsByTagName(
                    'JB_submission_time').pop(0)
                element_child = job_element.childNodes.pop(0)
                time_string = str(element_child.data).strip()
                try:
                    this_job.submission_time = self._parse_time_string(
                        time_string)
                except ValueError:
                    self.logger.warning("Error parsing 'JB_submission_time' "
                                        "for job id {} ('{}')".format(
                                            this_job.job_id, time_string))
            except IndexError:
                try:
                    job_element = job.getElementsByTagName(
                        'JAT_start_time').pop(0)
                    element_child = job_element.childNodes.pop(0)
                    time_string = str(element_child.data).strip()
                    try:
                        this_job.dispatch_time = self._parse_time_string(
                            time_string)
                    except ValueError:
                        self.logger.warning("Error parsing 'JAT_start_time'"
                                            "for job id {} ('{}')".format(
                                                this_job.job_id, time_string))
                except IndexError:
                    self.logger.warning("No 'JB_submission_time' and no "
                                        "'JAT_start_time' field for job "
                                        "id {}".format(this_job.job_id))

            #There is also cpu_usage, mem_usage, io_usage information available:
            if this_job.job_state == job_states.RUNNING:
                try:
                    job_element = job.getElementsByTagName('slots').pop(0)
                    element_child = job_element.childNodes.pop(0)
                    this_job.num_mpiprocs = str(element_child.data).strip()
                except IndexError:
                    self.logger.warning("No 'slots' field for job "
                                        "id {}".format(this_job.job_id))

            joblist.append(this_job)
        #self.logger.debug("joblist final: {}".format(joblist))
        return joblist