def parse(self, line): ''' Parses single line from accounting log file. Example line of accounting log file: "timestamp=2012-05-20 23:59:47" "userDN=/O=GermanGrid/OU=UniWuppertal/CN=Torsten Harenberg" "userFQAN=/atlas/Role=production/Capability=NULL" "ceID=cream-2-fzk.gridka.de:8443/cream-pbs-atlasXL" "jobID=CREAM410741480" "lrmsID=9575064.lrms1" "localUser=11999" Line was split, if you want to rejoin use ' ' as a joiner. ''' data = {} rc = {} record = BlahdRecord() # split file and remove parts which contain only space (like ' ') parts = [x.split('=',1) for x in [y for y in self.LINE_EXPR.split(line) if len(y) > 1]] # Simple mapping between keys in a log file and a table's columns mapping = { 'TimeStamp' : lambda x: 'T'.join(x['timestamp'].split()) + 'Z', 'GlobalUserName' : lambda x: x['userDN'], 'FQAN' : lambda x: x['userFQAN'], 'VO' : lambda x: parse_fqan(x['userFQAN'])[2], 'VOGroup' : lambda x: parse_fqan(x['userFQAN'])[1], 'VORole' : lambda x: parse_fqan(x['userFQAN'])[0], 'CE' : lambda x: x['ceID'], 'GlobalJobId' : lambda x: x['jobID'], 'LrmsId' : lambda x: x['lrmsID'], 'Site' : lambda x: self.site_name, 'ValidFrom' : lambda x: valid_from(parse_timestamp(x['timestamp'])), 'ValidUntil' : lambda x: valid_until(parse_timestamp(x['timestamp'])), 'Processed' : lambda x: Parser.UNPROCESSED} for key, value in parts: # Store only the first value encountered. This is mainly for the # userFQAN field as the first occurence of this is the primary FQAN. if key not in data: data[key] = value for key in mapping: rc[key] = mapping[key](data) record.set_all(rc) return record
def copy_records(db1, db2, cutoff): ''' Copy all records from the LcgRecords table in db1 to the JobRecords table in db2 whose EndTime is greater than the cutoff datetime. ''' c1 = db1.cursor(cursorclass=MySQLdb.cursors.SSCursor) c2 = db2.cursor() remove_proc(c2) c2.execute(CREATE_PROC) # extract records from source database into a cursor object c1.execute(SELECT_STMT % cutoff) counter = 0 inserted = 0 errors = {} start = time.time() batch_start = time.time() sys.stdout.write('% 21s % 12s\n' % ('Records processed', 'Time taken')) for r in c1: (site, jobid, userid, fqan, global_user_name, wall_duration, cpu_duration, start_time, end_time, submit_host, memory_real, memory_virtual, specint) = r role, group, vo = parse_fqan(fqan) if global_user_name == None: global_user_name = 'None' if role == None: role = 'None' if group == None: group = 'None' if vo == None: vo = 'None' start_time = parse_timestamp(start_time) end_time = parse_timestamp(end_time) try: c2.execute(CALLPROC_STMT, (site, submit_host, 'MachineName', jobid, userid, global_user_name, fqan, vo, group, role, wall_duration , cpu_duration, start_time, end_time, 'migration_script', 'grid', memory_real, memory_virtual, SPECINT, specint, 'Import')) inserted += 1 except Exception, err: try: # mysql code for duplicate record is 1062 if err[0] == 1062: err = 'Duplicate record not inserted' errors[str(err)] += 1 except (KeyError, TypeError): errors[str(err)] = 1 counter += 1 if counter % COMMIT_THRESHOLD == 0: db2.commit() if counter % SUMMARISE_THRESHOLD == 0 and counter != 0: batch_stop = time.time() sys.stdout.write('% 13d % 15.3f\n' % (counter, batch_stop - batch_start)) sys.stdout.flush() batch_start = time.time()