Exemple #1
0
    def parse(self, line):
        '''
        Parses single line from accounting log file.
        '''
        # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize -j $JOBID >> /var/log/apel/slurm_acc.20130311
        # 667|sleep|root|root|2013-03-11T12:47:37|2013-03-11T12:47:40|00:00:03|12|debug|4|2|cloud-vm-[03-04]|560K|100904K

        # log.info('line: %s' % (line));
        values = line.strip().split('|')

        rmem = 0
        if values[12]:
            # remove 'K' string from the end
            rmem = float(values[12][:-1])

        vmem = 0
        if values[13]:
            # remove 'K' string from the end
            vmem = float(values[13][:-1])

        mapping = {
                   'Site'            : lambda x: self.site_name,
                   'MachineName'     : lambda x: self.machine_name,
                   'Infrastructure'  : lambda x: "APEL-CREAM-SLURM",
                   'JobName'         : lambda x: x[0],
                   'LocalUserID'     : lambda x: x[2],
                   'LocalUserGroup'  : lambda x: x[3],
                   'WallDuration'    : lambda x: parse_time(x[6]),
                   'CpuDuration'     : lambda x: int(float(x[7])), 
                   # need to check timezones
                   'StartTime'       : lambda x: parse_timestamp(x[4]),
                   'StopTime'        : lambda x: parse_timestamp(x[5]),
                   'Queue'           : lambda x: x[9],
                   'MemoryReal'      : lambda x: int(rmem), # KB
                   'MemoryVirtual'   : lambda x: int(vmem), # KB
                   'Processors'      : lambda x: int(x[9]),
                   'NodeCount'       : lambda x: int(x[10])
        }

        rc = {}

        for key in mapping:
            rc[key] = mapping[key](values)

        assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value'
        assert rc['WallDuration'] >= 0, 'Negative WallDuration value'

        record = EventRecord()
        record.set_all(rc)
        return record      
Exemple #2
0
    def parse(self, line):
        '''
        Parses single line from accounting log file.
        
        Example line of accounting log file:
        "timestamp=2012-05-20 23:59:47" "userDN=/O=GermanGrid/OU=UniWuppertal/CN=Torsten Harenberg"
        "userFQAN=/atlas/Role=production/Capability=NULL" "ceID=cream-2-fzk.gridka.de:8443/cream-pbs-atlasXL"
        "jobID=CREAM410741480" "lrmsID=9575064.lrms1" "localUser=11999"
        
        Line was split, if you want to rejoin use ' ' as a joiner.
        '''
        data = {}
        rc = {}
        record = BlahdRecord()
        
        #  split file and remove parts which contain only space (like ' ')
        parts = [x.split('=',1) for x in [y for y in self.LINE_EXPR.split(line) if len(y) > 1]]
        
        # Simple mapping between keys in a log file and a table's columns
        mapping = {
            'TimeStamp'      : lambda x: 'T'.join(x['timestamp'].split()) + 'Z',
            'GlobalUserName' : lambda x: x['userDN'],
            'FQAN'           : lambda x: x['userFQAN'],
            'VO'             : lambda x: parse_fqan(x['userFQAN'])[2],
            'VOGroup'        : lambda x: parse_fqan(x['userFQAN'])[1],
            'VORole'         : lambda x: parse_fqan(x['userFQAN'])[0],
            'CE'             : lambda x: x['ceID'],
            'GlobalJobId'    : lambda x: x['jobID'],
            'LrmsId'         : lambda x: x['lrmsID'],
            'Site'           : lambda x: self.site_name,
            'ValidFrom'      : lambda x: valid_from(parse_timestamp(x['timestamp'])),
            'ValidUntil'     : lambda x: valid_until(parse_timestamp(x['timestamp'])),
            'Processed'      : lambda x: Parser.UNPROCESSED}

        for key, value in parts:
            # Store only the first value encountered. This is mainly for the
            # userFQAN field as the first occurence of this is the primary FQAN.
            if key not in data:
                data[key] = value

        for key in mapping:
            rc[key] = mapping[key](data)

        record.set_all(rc)
        
        return record
Exemple #3
0
 def test_parse_timestamp(self):
     '''
     Checks that the different time formats that we might have to parse
     are handled correctly.  Note that we convert into datetime objects
     with no timezone information for internal use.
     '''
     
     valid_dates = ['2010-01-01 10:01:02','2010-01-01T10:01:02Z','2010-01-01T11:01:02+01:00']
     dts = [ parse_timestamp(date) for date in valid_dates ]
     for dt in dts:
         self.assertEqual(dt.year, 2010)
         self.assertEqual(dt.month, 1)
         self.assertEqual(dt.day, 1)
         self.assertEqual(dt.hour, 10)
         self.assertEqual(dt.minute, 1)
         self.assertEqual(dt.second, 2)
         self.assertEqual(dt.tzinfo, None)
Exemple #4
0
    def test_parse_timestamp(self):
        '''
        Checks that the different time formats that we might have to parse
        are handled correctly.  Note that we convert into datetime objects
        with no timezone information for internal use.
        '''

        valid_dates = ['2010-01-01 10:01:02','2010-01-01T10:01:02Z','2010-01-01T11:01:02+01:00']
        dts = [ parse_timestamp(date) for date in valid_dates ]
        for dt in dts:
            self.assertEqual(dt.year, 2010)
            self.assertEqual(dt.month, 1)
            self.assertEqual(dt.day, 1)
            self.assertEqual(dt.hour, 10)
            self.assertEqual(dt.minute, 1)
            self.assertEqual(dt.second, 2)
            self.assertEqual(dt.tzinfo, None)
    def parse_car(self, xml_record):
        '''
        Main function for parsing CAR record.
        
        Interesting data can be fetched from 2 places:
         * as a content of node (here called text node)
         * as a attribute value (extracted by getAttr)
        '''
        functions = {
            'Site':
            lambda nodes: self.getText(nodes['Site'][0].childNodes),
            'SubmitHost':
            lambda nodes: self.getText(nodes['SubmitHost'][0].childNodes),
            'MachineName':
            lambda nodes: self.getText(nodes['MachineName'][0].childNodes),
            'Queue':
            lambda nodes: self.getText(nodes['Queue'][0].childNodes),
            'LocalJobId':
            lambda nodes: self.getText(nodes['LocalJobId'][0].childNodes),
            'LocalUserId':
            lambda nodes: self.getText(nodes['LocalUserId'][0].childNodes),
            'GlobalUserName':
            lambda nodes: self.getText(nodes['GlobalUserName'][0].childNodes),
            'FQAN':
            lambda nodes: self.getText(
                self.getTagByAttr(nodes['GroupAttribute'], 'type', 'FQAN')[0].
                childNodes),
            'VO':
            lambda nodes: self.getText(nodes['Group'][0].childNodes),
            'VOGroup':
            lambda nodes: self.getText(
                self.getTagByAttr(nodes['GroupAttribute'], 'type', 'group')[0].
                childNodes),
            'VORole':
            lambda nodes: self.getText(
                self.getTagByAttr(nodes['GroupAttribute'], 'type', 'role')[0].
                childNodes),
            'WallDuration':
            lambda nodes: iso2seconds(
                self.getText(nodes['WallDuration'][0].childNodes)),
            'CpuDuration':
            lambda nodes: iso2seconds(self.retrieve_cpu(nodes)),
            'Processors':
            lambda nodes: self.getText(nodes['Processors'][0].childNodes),
            'NodeCount':
            lambda nodes: self.getText(nodes['NodeCount'][0].childNodes),
            'MemoryReal':
            lambda nodes: None,
            'MemoryVirtual':
            lambda nodes: None,
            'StartTime':
            lambda nodes: parse_timestamp(
                self.getText(nodes['StartTime'][0].childNodes)),
            'EndTime':
            lambda nodes: parse_timestamp(
                self.getText(nodes['EndTime'][0].childNodes)),
            'InfrastructureDescription':
            lambda nodes: self.getAttr(nodes['Infrastructure'][0],
                                       'description'),
            'InfrastructureType':
            lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'type'),
            'ServiceLevelType':
            lambda nodes: self.getAttr(nodes['ServiceLevel'][0], 'type'),
            'ServiceLevel':
            lambda nodes: self.getText(nodes['ServiceLevel'][0].childNodes),
        }

        tags = [
            'Site', 'SubmitHost', 'MachineName', 'Queue', 'LocalJobId',
            'LocalUserId', 'GlobalUserName', 'GroupAttribute', 'Group',
            'WallDuration', 'CpuDuration', 'Memory', 'Processors', 'NodeCount',
            'StartTime', 'EndTime', 'Infrastructure', 'ServiceLevel'
        ]

        # Create a dictionary of all the tags we want to retrieve from the XML
        nodes = {}.fromkeys(tags)
        data = {}

        for node in nodes:
            # Create a list of nodes which match the tags we want.
            # Note that this only matches the one namespace we have defined.
            nodes[node] = xml_record.getElementsByTagNameNS(
                self.NAMESPACE, node)

        for field in functions:
            try:
                data[field] = functions[field](nodes)
            except (IndexError, KeyError, AttributeError), e:
                log.debug('Failed to parse field %s: %s', field, e)
Exemple #6
0
    def parse_car(self, xml_record):
        '''
        Main function for parsing CAR record.
        
        Interesting data can be fetched from 2 places:
         * as a content of node (here called text node)
         * as a attribute value (extracted by getAttr)
        '''
        functions = {
            'Site'             : lambda nodes: self.getText(nodes['Site'][0].childNodes),
            'SubmitHost'       : lambda nodes: self.getText(nodes['SubmitHost'][0].childNodes),
            'MachineName'      : lambda nodes: self.getText(nodes['MachineName'][0].childNodes),
            'Queue'            : lambda nodes: self.getText(nodes['Queue'][0].childNodes),
            'LocalJobId'       : lambda nodes: self.getText(nodes['LocalJobId'][0].childNodes),
            'LocalUserId'      : lambda nodes: self.getText(nodes['LocalUserId'][0].childNodes),
            'GlobalUserName'   : lambda nodes: self.getText(nodes['GlobalUserName'][0].childNodes),
            'FQAN'             : lambda nodes: self.getText(
                                        self.getTagByAttr(nodes['GroupAttribute'], 
                                                          'type', 'FQAN')[0].childNodes),
            'VO'               : lambda nodes: self.getText(nodes['Group'][0].childNodes),
            'VOGroup'          : lambda nodes: self.getText(
                                        self.getTagByAttr(nodes['GroupAttribute'], 
                                                          'type', 'group')[0].childNodes),
            'VORole'           : lambda nodes: self.getText(
                                        self.getTagByAttr(nodes['GroupAttribute'],
                                                          'type', 'role')[0].childNodes),
            'WallDuration'     : lambda nodes: iso2seconds(self.getText(
                                        nodes['WallDuration'][0].childNodes)),
            'CpuDuration'      : lambda nodes: iso2seconds(self.retrieve_cpu(nodes)),
            'Processors'       : lambda nodes: self.getText(nodes['Processors'][0].childNodes),
            'NodeCount'        : lambda nodes: self.getText(nodes['NodeCount'][0].childNodes),
            'MemoryReal'       : lambda nodes: None,
            'MemoryVirtual'    : lambda nodes: None,
            'StartTime'        : lambda nodes: parse_timestamp(self.getText(
                                        nodes['StartTime'][0].childNodes)),
            'EndTime'          : lambda nodes: parse_timestamp(self.getText(
                                        nodes['EndTime'][0].childNodes)),
            'InfrastructureDescription'      : lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'description'),
            'InfrastructureType'             : lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'type'),
            'ServiceLevelType' : lambda nodes: self.getAttr(
                                        nodes['ServiceLevel'][0], 'type'),
            'ServiceLevel'     : lambda nodes: self.getText(
                                        nodes['ServiceLevel'][0].childNodes),
            }

        tags = ['Site', 'SubmitHost', 'MachineName', 'Queue', 'LocalJobId', 'LocalUserId', 
                'GlobalUserName', 'GroupAttribute',
                'Group', 'WallDuration', 'CpuDuration', 'Memory', 
                'Processors', 'NodeCount', 'StartTime', 'EndTime', 'Infrastructure',
                'ServiceLevel']

        # Create a dictionary of all the tags we want to retrieve from the XML
        nodes = {}.fromkeys(tags)
        data = {}
        
        for node in nodes:
            # Create a list of nodes which match the tags we want.
            # Note that this only matches the one namespace we have defined.
            nodes[node] = xml_record.getElementsByTagNameNS(self.NAMESPACE, node)
        
        for field in functions:
            try:
                data[field] = functions[field](nodes)
            except (IndexError, KeyError, AttributeError), e:
                log.debug('Failed to parse field %s: %s' % (field, e))
    def parseAurRecord(self, xml_record):
        '''
        Main function for parsing AUR record.
        
        Interesting data can be fetched from 2 places:
         * as a content of node (here called text node)
         * as a attribute value (extracted by getAttr)
        '''
        functions = {
            'Site'             : lambda nodes: self.getText(nodes['Site'][0].childNodes),
            'Month'            : lambda nodes: self.getText(nodes['Month'][0].childNodes),
            'Year'             : lambda nodes: self.getText(nodes['Year'][0].childNodes),
            'GlobalUserName'   : lambda nodes: self.getText(nodes['GlobalUserName'][0].childNodes),
            'VO'               : lambda nodes: self.getText(nodes['Group'][0].childNodes),
            'VOGroup'          : lambda nodes: self.getText(
                                        self.getTagByAttr(nodes['GroupAttribute'], 
                                                          'type', 'vo-group', CarParser.NAMESPACE)[0].childNodes),
            'VORole'           : lambda nodes: self.getText(
                                        self.getTagByAttr(nodes['GroupAttribute'],
                                                          'type', 'role', CarParser.NAMESPACE)[0].childNodes),
            'MachineName'      : lambda nodes: self.getText(nodes['MachineName'][0].childNodes),
            'SubmitHost'       : lambda nodes: self.getText(nodes['SubmitHost'][0].childNodes),
            'Infrastructure'   : lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'type', CarParser.NAMESPACE),
            'EarliestEndTime'  : lambda nodes: parse_timestamp(self.getText(
                                        nodes['EarliestEndTime'][0].childNodes)),
            'LatestEndTime'  : lambda nodes: parse_timestamp(self.getText(
                                        nodes['LatestEndTime'][0].childNodes)),
            'WallDuration'     : lambda nodes: iso2seconds(self.getText(
                                        nodes['WallDuration'][0].childNodes)),
            'CpuDuration'      : lambda nodes: iso2seconds(self.getText(
                                        nodes['CpuDuration'][0].childNodes)),
            'NormalisedWallDuration': lambda nodes: iso2seconds(self.getText(
                nodes['NormalisedWallDuration'][0].childNodes)),
            'NormalisedCpuDuration': lambda nodes: iso2seconds(self.getText(
                nodes['NormalisedCpuDuration'][0].childNodes)),
            'NumberOfJobs'     : lambda nodes: self.getText(nodes['NumberOfJobs'][0].childNodes),
            'NodeCount'        : lambda nodes: self.getText(nodes['NodeCount'][0].childNodes),
            'Processors'       : lambda nodes: self.getText(nodes['Processors'][0].childNodes),
            }

        tags = ['Site', 'Month', 'Year', 'GlobalUserName', 'Group', 
                'GroupAttribute', 'SubmitHost', 'Infrastructure',
                'EarliestEndTime', 'LatestEndTime', 'WallDuration', 'CpuDuration', 
                'NormalisedWallDuration', 'NormalisedCpuDuration',
                'NumberOfJobs', 'NodeCount', 'Processors']

        nodes = {}.fromkeys(tags)
        data = {}

        for node in nodes:
            if node in ('GroupAttribute',):
                # For these attributes we need to dig into the GroupAttribute
                # elements to get the values so we save the whole elements.
                nodes[node] = xml_record.getElementsByTagNameNS(
                    CarParser.NAMESPACE, 'GroupAttribute')
            else:
                nodes[node] = xml_record.getElementsByTagNameNS(self.NAMESPACE, node)
                # Some of the nodes are in the CAR namespace.
                nodes[node].extend(xml_record.getElementsByTagNameNS(CarParser.NAMESPACE, node))

        for field in functions:
            try:
                data[field] = functions[field](nodes)
            except IndexError, e:
                log.debug('Failed to parse field %s: %s', field, e)
            except KeyError, e:
                log.debug('Failed to parse field %s: %s', field, e)
Exemple #8
0
    def parse(self, line):
        '''
        Parses single line from accounting log file.

        Example line of accounting log file:
        "timestamp=2017-02-01 00:03:49; clusterid=381620; CE_JobId=396933.0; owner=lhpilot007; VO=lhcb; 
        userDN=/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=romanov/CN=427293/CN=Vladimir Romanovskiy; 
        userFQAN=/lhcb/Role=pilot/Capability=NULL; [email protected]; request_cpus=1; 
        cputime=3466.000000; syscputime=259.000000; jobduration=4821.575215; walltime+suspensiontime=4823.000000; 
        suspensiontime=0.000000; cputmult=1.1864; pmem=1684532; vmem=944; disk=38543; ExitCode=0; 
        ExitSignal=undefined; LastStatus=4; JobStatus=3; startdate=1485899007; enddate=1485903829"

        Line was split, if you want to rejoin use ' ' as a joiner.
        '''
        data = {}
        rc = {}
        LINE_EXPR = re.compile(r'\"|\"_\"')
        # This is basically for the FQAN
        parts = [
            x.split('=', 1)
            for x in [y for y in LINE_EXPR.split(line) if len(y) > 1]
        ]

        for item in line.split("; "):
            key, value = item.split('=', 1)
            data[key] = value

        mapping = {
            'TimeStamp':
            lambda x: 'T'.join(x['timestamp'].split()) + 'Z',
            'GlobalUserName':
            lambda x: x['userDN'],
            'FQAN':
            lambda x: x['userFQAN'],
            'VO':
            lambda x: x['VO'],
            'VOGroup':
            lambda x: x['userFQAN'].split("/")[1],
            'VORole':
            lambda x: x['userFQAN'].split("/")[2],
            'CE':
            lambda x: self.machine_name + ":" + "9619" + "/" + self.
            machine_name + "-" + "condor",
            'GlobalJobId':
            lambda x: x['CE_JobId'] + "_" + self.machine_name,
            'LrmsId':
            lambda x: x['clusterid'] + "_" + self.machine_name,
            'Site':
            lambda x: self.site_name,
            'ValidFrom':
            lambda x: valid_from(parse_timestamp(x['timestamp'])),
            'ValidUntil':
            lambda x: valid_until(parse_timestamp(x['timestamp'])),
            'Processed':
            lambda x: Parser.UNPROCESSED
        }

        for key, value in parts:
            # Store only the first value encountered. This is mainly for the
            # userFQAN field as the first occurence of this is the primary FQAN.
            if key not in data:
                data[key] = value

        for key in mapping:
            rc[key] = mapping[key](data)

        record = HTCondorCERecord()
        record.set_all(rc)
        return record