def __init__(self, config, event_defs, pbs_directive='#PBS'):
     '''Constructor'''
     super(PbsScriptParser, self).__init__(event_defs)
     self._config = config
     self._job = PbsJob(self._config)
     self._pbs_directive = pbs_directive
     regex = r'\s*{0}\s+(.+)$'.format(pbs_directive)
     self._pbs_re = re.compile(regex)
     regex = r'\s+{0}\s+(.+)$'.format(pbs_directive)
     self._pbs_indented_re = re.compile(regex)
     regex = r'\s*{0}\s+(.+)$'.format(pbs_directive)
     self._pbs_extract_re = re.compile(regex)
     self._pbs_option_parser = PbsOptionParser(self._config, event_defs,
                                               self._job)
     self._state = None
     self._line_nr = 0
     self._pbs = []
Beispiel #2
0
 def parse_file(self, file_name):
     '''Parse the specified log'''
     with open(file_name, 'r') as pbs_file:
         line_nr = 0
         for line in pbs_file:
             line_nr += 1
             line = line.rstrip()
             if not line:
                 continue
             try:
                 time_stamp, event_type, job_id, info_str = line.split(';')
                 if job_id not in self._jobs:
                     self._jobs[job_id] = PbsJob(self._config, job_id)
                 event = PbsJobEvent(time_stamp, event_type, info_str)
                 self._jobs[job_id].add_event(event)
             except:
                 msg = 'problem on line {0} in {1}'.format(line, file_name)
                 raise PbsLogParserError(msg)
Beispiel #3
0
 def parse_record(self, record):
     '''parse an individual job record'''
     job = None
     resource_specs = {}
     resources_used = {}
     state = None
     host_str = None
     for line in record.split('\n'):
         line = line.strip()
         if state == 'exec_host':
             if not line.startswith('exec_port'):
                 host_str += line
                 continue
             else:
                 hosts = {}
                 for host in host_str.split('+'):
                     node, core = host.split('/')
                     if node not in hosts:
                         hosts[node] = []
                     hosts[node].append(core)
                 job.exec_host = hosts
                 state = None
                 host_str = None
         if line.startswith('Job Id:'):
             _, job_id = line.split(':', 1)
             job = PbsJob(self._config, job_id.strip())
         elif line.startswith('Job_Name ='):
             job.name = self._get_value(line)
         elif line.startswith('euser ='******'job_state = '):
             job.state = self._get_value(line)
         elif line.startswith('queue ='):
             job.queue = self._get_value(line)
         elif line.startswith('Account_Name ='):
             job.project = self._get_value(line)
         elif line.startswith('resources_used.walltime ='):
             walltime = self._get_value(line)
             resources_used['walltime'] = walltime2seconds(walltime)
         elif line.startswith('Resource_List.walltime ='):
             walltime = self._get_value(line)
             resource_specs['walltime'] = walltime2seconds(walltime)
         elif line.startswith('Resource_List.nodect = '):
             nodect = int(self._get_value(line))
             resource_specs['nodect'] = nodect
         elif line.startswith('exec_host ='):
             host_strs = self._get_value(line).split('+')
             exec_host = dict()
             for host_str in host_strs:
                 if '/' in host_str:
                     host, cores = host_str.split('/')
                     exec_host[host] = cores
                 else:
                     exec_host[host_str] = None
             job.exec_host = exec_host
         elif line.startswith('Resource_List.partition ='):
             job.partition = self._get_value(line)
     job.add_resource_specs(resource_specs)
     job.add_resources_used(resources_used)
     return job
Beispiel #4
0
class PbsScriptParser(EventLogger):
    '''Parser for PBS torque job files'''
    def __init__(self, config, event_defs, pbs_directive='#PBS'):
        '''Constructor'''
        super(PbsScriptParser, self).__init__(event_defs)
        self._config = config
        self._job = PbsJob(self._config)
        self._pbs_directive = pbs_directive
        regex = r'\s*{0}\s+(.+)$'.format(pbs_directive)
        self._pbs_re = re.compile(regex)
        regex = r'\s+{0}\s+(.+)$'.format(pbs_directive)
        self._pbs_indented_re = re.compile(regex)
        regex = r'\s*{0}\s+(.+)$'.format(pbs_directive)
        self._pbs_extract_re = re.compile(regex)
        regex = r'\s*-[A-Za-z]'
        self._pbs_option_re = re.compile(regex)
        self._pbs_option_parser = PbsOptionParser(self._config, event_defs,
                                                  self._job)
        self._state = None
        self._line_nr = 0
        self._pbs = []
        self._script_first_line_nr = None

    @property
    def script_first_line_nr(self):
        '''return the first line number of the Bash script part'''
        return self._script_first_line_nr

    def parse_file(self, pbs_file):
        '''parse a PBS file'''
        self._job.name = os.path.basename(pbs_file.name)
        self._state = 'start'
        self._line_nr = 0
        for line in pbs_file:
            self._line_nr += 1
            self.check_encoding(line)
            if self.is_comment(line):
                continue
            if len(line.strip()) == 0:
                continue
            if self._state == 'start':
                self.parse_shebang(line)
            elif self._state == 'pbs':
                self.parse_pbs(line)
            else:
                self.parse_script(line)
        if self._state == 'start':
            self.reg_event('no_script')
        elif self._state == 'pbs':
            self.reg_event('no_script')

    @property
    def job(self):
        '''returns a PbsJob object representing the job script'''
        return self._job

    def check_encoding(self, line):
        '''checks ASCII encoding and line endings'''
        try:
            line.decode('ascii')
        except UnicodeDecodeError:
            self.reg_event('non_ascii')
        if line.endswith('\r\n'):
            self.reg_event('dos_format')
        if line.endswith('\r'):
            self.reg_event('mac_format')

    def is_comment(self, line):
        '''returns True if the line is a comment'''
        return (re.match(r'\s*#', line)
                and not (self.is_shebang(line) or self.is_spaced_pbs(line)
                         or self.is_indented_pbs(line) or self.is_pbs(line)))

    def is_shebang(self, line):
        '''returns True if the line is a shebang'''
        return line.startswith('#!')

    def is_spaced_pbs(self, line):
        '''checks whether an extra space is added between hash and text
           in PBS directive, only for default directive string'''
        return self._pbs_directive == '#PBS' and re.match(
            r'\s*#\s+PBS\s+', line)

    def is_pbs(self, line):
        '''returns True if the line is a PBS directive'''
        return self._pbs_re.match(line)

    def is_indented_pbs(self, line):
        '''returns True if the line contains an indented PBS directive'''
        return self._pbs_indented_re.match(line)

    def parse_shebang(self, line):
        '''parse shebang part of PBS file'''
        if self.is_shebang(line):
            self._job.shebang = line.strip()
            if self._line_nr > 1:
                self.reg_event('misplaced_shebang')
            self._state = 'pbs'
        else:
            self.reg_event('missing_shebang')
            self._state = 'pbs'
            self.parse_pbs(line)

    def parse_pbs(self, line):
        '''parse PBS directives part of a PBS file'''
        if self.is_shebang(line):
            self.reg_event('misplaced_shebang')
        elif self.is_spaced_pbs(line):
            self.reg_event('space_in_pbs_dir')
        elif self.is_pbs(line):
            if self.is_indented_pbs(line):
                self.reg_event('indented_pbs_dir')
            match = self._pbs_extract_re.match(line)
            if match:
                option = match.group(1)
                if self._pbs_option_re.match(option):
                    self._pbs_option_parser.parse_args(option)
                    self.merge_events(self._pbs_option_parser.events)
                else:
                    self.reg_event('malformed_pbs_dir')
            else:
                self.reg_event('malformed_pbs_dir')
        else:
            self._state = 'script'
            if not self._script_first_line_nr:
                self._script_first_line_nr = self._line_nr
            self.parse_script(line)

    def parse_script(self, line):
        '''parse shell script part of a PBS file'''
        if self.is_shebang(line):
            self.reg_event('misplaced_shebang')
        if self.is_pbs(line):
            self.reg_event('misplace_pbs_dir')
        self._job.add_script_line(self._line_nr, line)
Beispiel #5
0
 def parse_record(self, record):
     '''parse an individual job record'''
     job = None
     resource_specs = {}
     resources_used = {}
     state = None
     host_str = None
     for line in record.split('\n'):
         line = line.strip()
         if state == 'exec_host':
             if not line.startswith('exec_port'):
                 host_str += line
                 continue
             else:
                 hosts = {}
                 for host in host_str.split('+'):
                     node, core = host.split('/')
                     if node not in hosts:
                         hosts[node] = []
                     hosts[node].append(core)
                 job.exec_host = hosts
                 state = None
                 host_str = None
         if line.startswith('Job Id:'):
             _, job_id = line.split(':', 1)
             job = PbsJob(self._config, job_id.strip())
         elif line.startswith('Job_Name ='):
             job.name = self._get_value(line)
         elif line.startswith('euser ='******'job_state = '):
             job.state = self._get_value(line)
         elif line.startswith('queue ='):
             job.queue = self._get_value(line)
         elif line.startswith('Account_Name ='):
             job.project = self._get_value(line)
         elif line.startswith('resources_used.walltime ='):
             walltime = self._get_value(line)
             resources_used['walltime'] = walltime2seconds(walltime)
         elif line.startswith('Resource_List.walltime ='):
             walltime = self._get_value(line)
             resource_specs['walltime'] = walltime2seconds(walltime)
         elif line.startswith('Resource_List.nodect = '):
             nodect = int(self._get_value(line))
             resource_specs['nodect'] = nodect
         elif line.startswith('exec_host ='):
             host_strs = self._get_value(line) .split('+')
             exec_host = dict()
             for host_str in host_strs:
                 if '/' in host_str:
                     host, cores = host_str.split('/')
                     exec_host[host] = cores
                 else:
                     exec_host[host_str] = None
             job.exec_host = exec_host
         elif line.startswith('Resource_List.partition ='):
             job.partition = self._get_value(line)
     job.add_resource_specs(resource_specs)
     job.add_resources_used(resources_used)
     return job
class PbsScriptParser(EventLogger):
    '''Parser for PBS torque job files'''

    def __init__(self, config, event_defs, pbs_directive='#PBS'):
        '''Constructor'''
        super(PbsScriptParser, self).__init__(event_defs)
        self._config = config
        self._job = PbsJob(self._config)
        self._pbs_directive = pbs_directive
        regex = r'\s*{0}\s+(.+)$'.format(pbs_directive)
        self._pbs_re = re.compile(regex)
        regex = r'\s+{0}\s+(.+)$'.format(pbs_directive)
        self._pbs_indented_re = re.compile(regex)
        regex = r'\s*{0}\s+(.+)$'.format(pbs_directive)
        self._pbs_extract_re = re.compile(regex)
        regex = r'\s*-[A-Za-z]'
        self._pbs_option_re = re.compile(regex)
        self._pbs_option_parser = PbsOptionParser(self._config, event_defs,
                                                  self._job)
        self._state = None
        self._line_nr = 0
        self._pbs = []
        self._script_first_line_nr = None

    @property
    def script_first_line_nr(self):
        '''return the first line number of the Bash script part'''
        return self._script_first_line_nr

    def parse_file(self, pbs_file):
        '''parse a PBS file'''
        self._job.name = os.path.basename(pbs_file.name)
        self._state = 'start'
        self._line_nr = 0
        for line in pbs_file:
            self._line_nr += 1
            self.check_encoding(line)
            if self.is_comment(line):
                continue
            if len(line.strip()) == 0:
                continue
            if self._state == 'start':
                self.parse_shebang(line)
            elif self._state == 'pbs':
                self.parse_pbs(line)
            else:
                self.parse_script(line)
        if self._state == 'start':
            self.reg_event('no_script')
        elif self._state == 'pbs':
            self.reg_event('no_script')

    @property
    def job(self):
        '''returns a PbsJob object representing the job script'''
        return self._job

    def check_encoding(self, line):
        '''checks ASCII encoding and line endings'''
        try:
            line.decode('ascii')
        except UnicodeDecodeError:
            self.reg_event('non_ascii')
        if line.endswith('\r\n'):
            self.reg_event('dos_format')
        if line.endswith('\r'):
            self.reg_event('mac_format')

    def is_comment(self, line):
        '''returns True if the line is a comment'''
        return (re.match(r'\s*#', line) and
                not (self.is_shebang(line) or
                     self.is_spaced_pbs(line) or
                     self.is_indented_pbs(line) or
                     self.is_pbs(line)))

    def is_shebang(self, line):
        '''returns True if the line is a shebang'''
        return line.startswith('#!')

    def is_spaced_pbs(self, line):
        '''checks whether an extra space is added between hash and text
           in PBS directive, only for default directive string'''
        return self._pbs_directive == '#PBS' and re.match(r'\s*#\s+PBS\s+',
                                                          line)

    def is_pbs(self, line):
        '''returns True if the line is a PBS directive'''
        return self._pbs_re.match(line)

    def is_indented_pbs(self, line):
        '''returns True if the line contains an indented PBS directive'''
        return self._pbs_indented_re.match(line)

    def parse_shebang(self, line):
        '''parse shebang part of PBS file'''
        if self.is_shebang(line):
            self._job.shebang = line.strip()
            if self._line_nr > 1:
                self.reg_event('misplaced_shebang')
            self._state = 'pbs'
        else:
            self.reg_event('missing_shebang')
            self._state = 'pbs'
            self.parse_pbs(line)

    def parse_pbs(self, line):
        '''parse PBS directives part of a PBS file'''
        if self.is_shebang(line):
            self.reg_event('misplaced_shebang')
        elif self.is_spaced_pbs(line):
            self.reg_event('space_in_pbs_dir')
        elif self.is_pbs(line):
            if self.is_indented_pbs(line):
                self.reg_event('indented_pbs_dir')
            match = self._pbs_extract_re.match(line)
            if match:
                option = match.group(1)
                if self._pbs_option_re.match(option):
                    self._pbs_option_parser.parse_args(option)
                    self.merge_events(self._pbs_option_parser.events)
                else:
                    self.reg_event('malformed_pbs_dir')
            else:
                self.reg_event('malformed_pbs_dir')
        else:
            self._state = 'script'
            if not self._script_first_line_nr:
                self._script_first_line_nr = self._line_nr
            self.parse_script(line)

    def parse_script(self, line):
        '''parse shell script part of a PBS file'''
        if self.is_shebang(line):
            self.reg_event('misplaced_shebang')
        if self.is_pbs(line):
            self.reg_event('misplace_pbs_dir')
        self._job.add_script_line(self._line_nr, line)