def parse(self, line): ''' Parses single line from accounting log file. ''' # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize -j $JOBID >> /var/log/apel/slurm_acc.20130311 # 667|sleep|root|root|2013-03-11T12:47:37|2013-03-11T12:47:40|00:00:03|12|debug|4|2|cloud-vm-[03-04]|560K|100904K # log.info('line: %s' % (line)); values = line.strip().split('|') rmem = 0 if values[12]: # remove 'K' string from the end rmem = float(values[12][:-1]) vmem = 0 if values[13]: # remove 'K' string from the end vmem = float(values[13][:-1]) mapping = { 'Site' : lambda x: self.site_name, 'MachineName' : lambda x: self.machine_name, 'Infrastructure' : lambda x: "APEL-CREAM-SLURM", 'JobName' : lambda x: x[0], 'LocalUserID' : lambda x: x[2], 'LocalUserGroup' : lambda x: x[3], 'WallDuration' : lambda x: parse_time(x[6]), 'CpuDuration' : lambda x: int(float(x[7])), # need to check timezones 'StartTime' : lambda x: parse_timestamp(x[4]), 'StopTime' : lambda x: parse_timestamp(x[5]), 'Queue' : lambda x: x[9], 'MemoryReal' : lambda x: int(rmem), # KB 'MemoryVirtual' : lambda x: int(vmem), # KB 'Processors' : lambda x: int(x[9]), 'NodeCount' : lambda x: int(x[10]) } rc = {} for key in mapping: rc[key] = mapping[key](values) assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value' assert rc['WallDuration'] >= 0, 'Negative WallDuration value' record = EventRecord() record.set_all(rc) return record
def parse(self, line): ''' Parses single line from accounting log file. Example line of accounting log file: "timestamp=2012-05-20 23:59:47" "userDN=/O=GermanGrid/OU=UniWuppertal/CN=Torsten Harenberg" "userFQAN=/atlas/Role=production/Capability=NULL" "ceID=cream-2-fzk.gridka.de:8443/cream-pbs-atlasXL" "jobID=CREAM410741480" "lrmsID=9575064.lrms1" "localUser=11999" Line was split, if you want to rejoin use ' ' as a joiner. ''' data = {} rc = {} record = BlahdRecord() # split file and remove parts which contain only space (like ' ') parts = [x.split('=',1) for x in [y for y in self.LINE_EXPR.split(line) if len(y) > 1]] # Simple mapping between keys in a log file and a table's columns mapping = { 'TimeStamp' : lambda x: 'T'.join(x['timestamp'].split()) + 'Z', 'GlobalUserName' : lambda x: x['userDN'], 'FQAN' : lambda x: x['userFQAN'], 'VO' : lambda x: parse_fqan(x['userFQAN'])[2], 'VOGroup' : lambda x: parse_fqan(x['userFQAN'])[1], 'VORole' : lambda x: parse_fqan(x['userFQAN'])[0], 'CE' : lambda x: x['ceID'], 'GlobalJobId' : lambda x: x['jobID'], 'LrmsId' : lambda x: x['lrmsID'], 'Site' : lambda x: self.site_name, 'ValidFrom' : lambda x: valid_from(parse_timestamp(x['timestamp'])), 'ValidUntil' : lambda x: valid_until(parse_timestamp(x['timestamp'])), 'Processed' : lambda x: Parser.UNPROCESSED} for key, value in parts: # Store only the first value encountered. This is mainly for the # userFQAN field as the first occurence of this is the primary FQAN. if key not in data: data[key] = value for key in mapping: rc[key] = mapping[key](data) record.set_all(rc) return record
def test_parse_timestamp(self): ''' Checks that the different time formats that we might have to parse are handled correctly. Note that we convert into datetime objects with no timezone information for internal use. ''' valid_dates = ['2010-01-01 10:01:02','2010-01-01T10:01:02Z','2010-01-01T11:01:02+01:00'] dts = [ parse_timestamp(date) for date in valid_dates ] for dt in dts: self.assertEqual(dt.year, 2010) self.assertEqual(dt.month, 1) self.assertEqual(dt.day, 1) self.assertEqual(dt.hour, 10) self.assertEqual(dt.minute, 1) self.assertEqual(dt.second, 2) self.assertEqual(dt.tzinfo, None)
def parse_car(self, xml_record): ''' Main function for parsing CAR record. Interesting data can be fetched from 2 places: * as a content of node (here called text node) * as a attribute value (extracted by getAttr) ''' functions = { 'Site': lambda nodes: self.getText(nodes['Site'][0].childNodes), 'SubmitHost': lambda nodes: self.getText(nodes['SubmitHost'][0].childNodes), 'MachineName': lambda nodes: self.getText(nodes['MachineName'][0].childNodes), 'Queue': lambda nodes: self.getText(nodes['Queue'][0].childNodes), 'LocalJobId': lambda nodes: self.getText(nodes['LocalJobId'][0].childNodes), 'LocalUserId': lambda nodes: self.getText(nodes['LocalUserId'][0].childNodes), 'GlobalUserName': lambda nodes: self.getText(nodes['GlobalUserName'][0].childNodes), 'FQAN': lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'FQAN')[0]. childNodes), 'VO': lambda nodes: self.getText(nodes['Group'][0].childNodes), 'VOGroup': lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'group')[0]. childNodes), 'VORole': lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'role')[0]. childNodes), 'WallDuration': lambda nodes: iso2seconds( self.getText(nodes['WallDuration'][0].childNodes)), 'CpuDuration': lambda nodes: iso2seconds(self.retrieve_cpu(nodes)), 'Processors': lambda nodes: self.getText(nodes['Processors'][0].childNodes), 'NodeCount': lambda nodes: self.getText(nodes['NodeCount'][0].childNodes), 'MemoryReal': lambda nodes: None, 'MemoryVirtual': lambda nodes: None, 'StartTime': lambda nodes: parse_timestamp( self.getText(nodes['StartTime'][0].childNodes)), 'EndTime': lambda nodes: parse_timestamp( self.getText(nodes['EndTime'][0].childNodes)), 'InfrastructureDescription': lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'description'), 'InfrastructureType': lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'type'), 'ServiceLevelType': lambda nodes: self.getAttr(nodes['ServiceLevel'][0], 'type'), 'ServiceLevel': lambda nodes: self.getText(nodes['ServiceLevel'][0].childNodes), } tags = [ 'Site', 'SubmitHost', 'MachineName', 'Queue', 'LocalJobId', 'LocalUserId', 'GlobalUserName', 'GroupAttribute', 'Group', 'WallDuration', 'CpuDuration', 'Memory', 'Processors', 'NodeCount', 'StartTime', 'EndTime', 'Infrastructure', 'ServiceLevel' ] # Create a dictionary of all the tags we want to retrieve from the XML nodes = {}.fromkeys(tags) data = {} for node in nodes: # Create a list of nodes which match the tags we want. # Note that this only matches the one namespace we have defined. nodes[node] = xml_record.getElementsByTagNameNS( self.NAMESPACE, node) for field in functions: try: data[field] = functions[field](nodes) except (IndexError, KeyError, AttributeError), e: log.debug('Failed to parse field %s: %s', field, e)
def parse_car(self, xml_record): ''' Main function for parsing CAR record. Interesting data can be fetched from 2 places: * as a content of node (here called text node) * as a attribute value (extracted by getAttr) ''' functions = { 'Site' : lambda nodes: self.getText(nodes['Site'][0].childNodes), 'SubmitHost' : lambda nodes: self.getText(nodes['SubmitHost'][0].childNodes), 'MachineName' : lambda nodes: self.getText(nodes['MachineName'][0].childNodes), 'Queue' : lambda nodes: self.getText(nodes['Queue'][0].childNodes), 'LocalJobId' : lambda nodes: self.getText(nodes['LocalJobId'][0].childNodes), 'LocalUserId' : lambda nodes: self.getText(nodes['LocalUserId'][0].childNodes), 'GlobalUserName' : lambda nodes: self.getText(nodes['GlobalUserName'][0].childNodes), 'FQAN' : lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'FQAN')[0].childNodes), 'VO' : lambda nodes: self.getText(nodes['Group'][0].childNodes), 'VOGroup' : lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'group')[0].childNodes), 'VORole' : lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'role')[0].childNodes), 'WallDuration' : lambda nodes: iso2seconds(self.getText( nodes['WallDuration'][0].childNodes)), 'CpuDuration' : lambda nodes: iso2seconds(self.retrieve_cpu(nodes)), 'Processors' : lambda nodes: self.getText(nodes['Processors'][0].childNodes), 'NodeCount' : lambda nodes: self.getText(nodes['NodeCount'][0].childNodes), 'MemoryReal' : lambda nodes: None, 'MemoryVirtual' : lambda nodes: None, 'StartTime' : lambda nodes: parse_timestamp(self.getText( nodes['StartTime'][0].childNodes)), 'EndTime' : lambda nodes: parse_timestamp(self.getText( nodes['EndTime'][0].childNodes)), 'InfrastructureDescription' : lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'description'), 'InfrastructureType' : lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'type'), 'ServiceLevelType' : lambda nodes: self.getAttr( nodes['ServiceLevel'][0], 'type'), 'ServiceLevel' : lambda nodes: self.getText( nodes['ServiceLevel'][0].childNodes), } tags = ['Site', 'SubmitHost', 'MachineName', 'Queue', 'LocalJobId', 'LocalUserId', 'GlobalUserName', 'GroupAttribute', 'Group', 'WallDuration', 'CpuDuration', 'Memory', 'Processors', 'NodeCount', 'StartTime', 'EndTime', 'Infrastructure', 'ServiceLevel'] # Create a dictionary of all the tags we want to retrieve from the XML nodes = {}.fromkeys(tags) data = {} for node in nodes: # Create a list of nodes which match the tags we want. # Note that this only matches the one namespace we have defined. nodes[node] = xml_record.getElementsByTagNameNS(self.NAMESPACE, node) for field in functions: try: data[field] = functions[field](nodes) except (IndexError, KeyError, AttributeError), e: log.debug('Failed to parse field %s: %s' % (field, e))
def parseAurRecord(self, xml_record): ''' Main function for parsing AUR record. Interesting data can be fetched from 2 places: * as a content of node (here called text node) * as a attribute value (extracted by getAttr) ''' functions = { 'Site' : lambda nodes: self.getText(nodes['Site'][0].childNodes), 'Month' : lambda nodes: self.getText(nodes['Month'][0].childNodes), 'Year' : lambda nodes: self.getText(nodes['Year'][0].childNodes), 'GlobalUserName' : lambda nodes: self.getText(nodes['GlobalUserName'][0].childNodes), 'VO' : lambda nodes: self.getText(nodes['Group'][0].childNodes), 'VOGroup' : lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'vo-group', CarParser.NAMESPACE)[0].childNodes), 'VORole' : lambda nodes: self.getText( self.getTagByAttr(nodes['GroupAttribute'], 'type', 'role', CarParser.NAMESPACE)[0].childNodes), 'MachineName' : lambda nodes: self.getText(nodes['MachineName'][0].childNodes), 'SubmitHost' : lambda nodes: self.getText(nodes['SubmitHost'][0].childNodes), 'Infrastructure' : lambda nodes: self.getAttr(nodes['Infrastructure'][0], 'type', CarParser.NAMESPACE), 'EarliestEndTime' : lambda nodes: parse_timestamp(self.getText( nodes['EarliestEndTime'][0].childNodes)), 'LatestEndTime' : lambda nodes: parse_timestamp(self.getText( nodes['LatestEndTime'][0].childNodes)), 'WallDuration' : lambda nodes: iso2seconds(self.getText( nodes['WallDuration'][0].childNodes)), 'CpuDuration' : lambda nodes: iso2seconds(self.getText( nodes['CpuDuration'][0].childNodes)), 'NormalisedWallDuration': lambda nodes: iso2seconds(self.getText( nodes['NormalisedWallDuration'][0].childNodes)), 'NormalisedCpuDuration': lambda nodes: iso2seconds(self.getText( nodes['NormalisedCpuDuration'][0].childNodes)), 'NumberOfJobs' : lambda nodes: self.getText(nodes['NumberOfJobs'][0].childNodes), 'NodeCount' : lambda nodes: self.getText(nodes['NodeCount'][0].childNodes), 'Processors' : lambda nodes: self.getText(nodes['Processors'][0].childNodes), } tags = ['Site', 'Month', 'Year', 'GlobalUserName', 'Group', 'GroupAttribute', 'SubmitHost', 'Infrastructure', 'EarliestEndTime', 'LatestEndTime', 'WallDuration', 'CpuDuration', 'NormalisedWallDuration', 'NormalisedCpuDuration', 'NumberOfJobs', 'NodeCount', 'Processors'] nodes = {}.fromkeys(tags) data = {} for node in nodes: if node in ('GroupAttribute',): # For these attributes we need to dig into the GroupAttribute # elements to get the values so we save the whole elements. nodes[node] = xml_record.getElementsByTagNameNS( CarParser.NAMESPACE, 'GroupAttribute') else: nodes[node] = xml_record.getElementsByTagNameNS(self.NAMESPACE, node) # Some of the nodes are in the CAR namespace. nodes[node].extend(xml_record.getElementsByTagNameNS(CarParser.NAMESPACE, node)) for field in functions: try: data[field] = functions[field](nodes) except IndexError, e: log.debug('Failed to parse field %s: %s', field, e) except KeyError, e: log.debug('Failed to parse field %s: %s', field, e)
def parse(self, line): ''' Parses single line from accounting log file. Example line of accounting log file: "timestamp=2017-02-01 00:03:49; clusterid=381620; CE_JobId=396933.0; owner=lhpilot007; VO=lhcb; userDN=/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=romanov/CN=427293/CN=Vladimir Romanovskiy; userFQAN=/lhcb/Role=pilot/Capability=NULL; [email protected]; request_cpus=1; cputime=3466.000000; syscputime=259.000000; jobduration=4821.575215; walltime+suspensiontime=4823.000000; suspensiontime=0.000000; cputmult=1.1864; pmem=1684532; vmem=944; disk=38543; ExitCode=0; ExitSignal=undefined; LastStatus=4; JobStatus=3; startdate=1485899007; enddate=1485903829" Line was split, if you want to rejoin use ' ' as a joiner. ''' data = {} rc = {} LINE_EXPR = re.compile(r'\"|\"_\"') # This is basically for the FQAN parts = [ x.split('=', 1) for x in [y for y in LINE_EXPR.split(line) if len(y) > 1] ] for item in line.split("; "): key, value = item.split('=', 1) data[key] = value mapping = { 'TimeStamp': lambda x: 'T'.join(x['timestamp'].split()) + 'Z', 'GlobalUserName': lambda x: x['userDN'], 'FQAN': lambda x: x['userFQAN'], 'VO': lambda x: x['VO'], 'VOGroup': lambda x: x['userFQAN'].split("/")[1], 'VORole': lambda x: x['userFQAN'].split("/")[2], 'CE': lambda x: self.machine_name + ":" + "9619" + "/" + self. machine_name + "-" + "condor", 'GlobalJobId': lambda x: x['CE_JobId'] + "_" + self.machine_name, 'LrmsId': lambda x: x['clusterid'] + "_" + self.machine_name, 'Site': lambda x: self.site_name, 'ValidFrom': lambda x: valid_from(parse_timestamp(x['timestamp'])), 'ValidUntil': lambda x: valid_until(parse_timestamp(x['timestamp'])), 'Processed': lambda x: Parser.UNPROCESSED } for key, value in parts: # Store only the first value encountered. This is mainly for the # userFQAN field as the first occurence of this is the primary FQAN. if key not in data: data[key] = value for key in mapping: rc[key] = mapping[key](data) record = HTCondorCERecord() record.set_all(rc) return record