def process_record(self, record): #TODO: yield the value for processing to gratia () # logfile attribute (if present) is used to keep track and delete files DebugPrint(5, "Creating JUR for %s" % record) # Filter out uninteresting records (and remove their files) if False: if 'gratia_logfile' in record: DebugPrint( 1, 'Deleting transient record file: ' + record["gratia_logfile"]) file_utils.RemoveFile(record['gratia_logfile']) raise IgnoreRecordException("Ignoring record.") # Define the record # UsageRecord is defined in https://twiki.opensciencegrid.org/bin/view/Accounting/ProbeDevelopement # setters have the name of the attribute # Set resource type ( Batch, BatchPilot, GridMonitor, Storage, ActiveTape ) resource_type = "Batch" r = Gratia.UsageRecord(resource_type) # fill r using the values in record # remember to specify the transient file (that will be removed if the record # is acquired successfully) if 'logfile' in record: r.AddTransientInputFile(record['gratia_logfile']) return r
def GetRecord(jobid=0): """ Create a sample Gratia record """ record = Gratia.UsageRecord('Batch') record.LocalUserId('cmsuser000') record.GlobalUsername('john ainsworth') record.DN('CN=john ainsworth, L=MC, OU=Manchester, O=eScience, C=UK') record.LocalJobId('PBS.1234.0bad') record.LocalJobId('PBS.1234.' + str(jobid)) # overwrite the previous entry record.JobName('cmsreco ', 'this is not a real job name') record.Charge('1240') record.Status('4') record.Status(4) record.Njobs(3, 'Aggregation over 10 days') record.Network(3.5, 'Gb', 30, 'total') # record.Disk(3.5, "Gb", 13891, "max") # record.Memory(650000, "KB", "min") # record.Swap(1.5, "GB", "max") record.ServiceLevel('BottomFeeder', 'QOS') record.TimeDuration(24, 'submit') record.TimeInstant('2005-11-02T15:48:39Z', 'submit') record.WallDuration(6000 * 3600 * 25 + 63 * 60 + 21.2, 'Was entered in seconds') record.CpuDuration('PT23H12M1.75S', 'user', 'Was entered as text') record.CpuDuration('PT12M1.75S', 'sys', 'Was entered as text') record.NodeCount(3) # default to total record.Processors(3, .75, 'total') record.StartTime(1130946550, 'Was entered in seconds') record.EndTime('2005-11-03T17:52:55Z', 'Was entered as text') record.MachineName('flxi02.fnal.gov') record.SubmitHost('patlx7.fnal.gov') record.Host('flxi02.fnal.gov', True) record.Queue('CepaQueue') record.ProjectName('cms reco') record.AdditionalInfo('RemoteWallTime', 94365) record.Resource('RemoteCpuTime', 'PT23H') return record
def GetRecord(jobid = 0): r = Gratia.UsageRecord("Batch") r.LocalUserId("cmsuser000") r.GlobalUsername("john ainsworth") r.DN("CN=john ainsworth, L=MC, OU=Manchester, O=eScience, C=UK") r.LocalJobId("PBS.1234.0bad") r.LocalJobId("PBS.1234.5." + str(jobid)) # overwrite the previous entry r.JobName("cmsreco","this is not a real job name") r.Charge("1240") r.Status("4") r.Status(4) r.Njobs(3,"Aggregation over 10 days") r.Network(3.5,"Gb",30,"total") #r.Disk(3.5,"Gb",13891,"max") #r.Memory(650000,"KB","min") #r.Swap(1.5,"GB","max") r.ServiceLevel("BottomFeeder","QOS") r.TimeDuration(24,"submit") r.TimeInstant("2005-11-02T15:48:39Z","submit") r.WallDuration(6000*3600*25+63*60+21.2,"Was entered in seconds") r.CpuDuration("PT23H12M1.75S","user","Was entered as text") r.CpuDuration("PT12M1.75S","sys","Was entered as text") r.NodeCount(3) # default to total r.Processors(3,.75,"total") r.StartTime(1130946550,"Was entered in seconds") r.EndTime("2005-11-03T17:52:55Z","Was entered as text") r.MachineName("flxi02.fnal.gov") r.SubmitHost("patlx7.fnal.gov") r.Host("flxi02.fnal.gov",True) r.Queue("CepaQueue") r.ProjectName("cms reco") r.AdditionalInfo("RemoteWallTime",94365) r.Resource("RemoteCpuTime","PT23H") return r
def process_record(self, record): #TODO: yield the value for processing to gratia () # logfile attribute (if present) is used to keep track and delete files DebugPrint(5, "Creating JUR for %s" % record) # Filter out uninteresting records (and remove their files) if False: if 'gratia_logfile' in record: DebugPrint( 1, 'Deleting transient record file: ' + record["gratia_logfile"]) file_utils.RemoveFile(record['gratia_logfile']) raise IgnoreRecordException("Ignoring record.") # Define the record # UsageRecord is defined in https://twiki.opensciencegrid.org/bin/view/Accounting/ProbeDevelopement # setters have the name of the attribute # Set resource type ( Batch, BatchPilot, GridMonitor, Storage, ActiveTape ) resource_type = "Batch" r = Gratia.UsageRecord(resource_type) # fill r using the values in record # remember to specify the transient file (that will be removed if the record # is acquired successfully) if 'logfile' in record: r.AddTransientInputFile(record['gratia_logfile']) return r # TODO: end of part to remove ############################################################# # Some references # http://seann.herdejurgen.com/resume/samag.com/html/v11/i04/a6.htm # http://stackoverflow.com/questions/14863224/efficient-reading-of-800-gb-xml-file-in-python-2-7 # http://radimrehurek.com/2014/03/data-streaming-in-python-generators-iterators-iterables/
def _convertBillingInfoToGratiaUsageRecord(self, row): """ Take a record returned from the database and convert it to a Gratia UsageRecord @param row: A dictionary-like object describing the Billing DB entry. @return: UsageRecord equivalent to the input row """ # Convert date to utc. This can't be done perfectly, alas, since we # don't have the original timezone. We assume localtime. # This code is horrible, but it should work. row['datestamp'] should # be a datetime.datetime object. # make the time into a float fltTime = time.mktime(row['datestamp'].timetuple()) startTime = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(fltTime)) # NOTE WELL: we need the time accurate to milliseconds. So we # add it back to the UTC time. startTime = startTime + "." + \ locale.format("%06d", row['datestamp'].microsecond) + "Z" # convert the connection time in milliseconds to a decimal in seconds connectTime = float(row['connectiontime']) / 1000.0 connectionTimeStr = 'PT' + str(connectTime) + 'S' # Check for the link to the doorinfo table being bad and log a # warning in the hope that somebody notices a bug has crept in. if row['doorlink'] == '<undefined>' and \ not row['protocol'].startswith('DCap'): self._log.warn( 'billinginfo record with datestamp ' + \ startTime + ' contained undefined initiator field' ) # Work out the end points of the data transfer. thisHost = str(row['cellname']) + '@' + self._dCacheSvrHost if row['isnew']: srcHost = row['client'] dstHost = thisHost isNew = 1 else: srcHost = thisHost dstHost = row['client'] isNew = 0 rec = Gratia.UsageRecord('Storage') rec.Njobs(row['njobs']) rec.AdditionalInfo('Source', srcHost) rec.AdditionalInfo('Destination', dstHost) rec.AdditionalInfo('Protocol', row['protocol']) rec.AdditionalInfo('IsNew', isNew) rec.LocalJobId(row['transaction']) if row['protocol'].startswith("DCap"): rec.Grid("Local") else: # Set the grid name to the default in the ProbeConfig rec.Grid(self._grid) rec.StartTime(startTime) rec.Network(row['transfersize'], 'b', connectionTimeStr, 'total', row['action']) rec.WallDuration(connectionTimeStr) # only send the initiator if it is known. if row['initiator'] != 'unknown': rec.DN(row['initiator']) # if the initiator host is "unknown", make it "Unknown". initiatorHost = row['initiatorhost'] if initiatorHost == 'unknown': initiatorHost = 'Unknown' rec.SubmitHost(initiatorHost) rec.Status(row['errorcode']) # If we included the mapped uid as the local user id, then # Gratia will make a best effort to map this to the VO name. mappedUID = row['mappeduid'] mappedGID = row['mappedgid'] if row['protocol'] == 'NFS4-4.1': username = row['initiator'] rec.LocalUserId(username) return rec try: username = '******' if row['initiator'] != 'unknown': username = row['initiator'] if mappedUID != None and int(mappedUID) >= 0: try: info = pwd.getpwuid(int(mappedUID)) username = info[0] except: try: mtime = os.stat(self._unix_gid_list_file_name).st_mtime if self.__gid_file_mod_time != mtime: self.__gid_file_mod_time = mtime self.__refresh_group_map() username = self.__group_map.get(str(mappedGID)) if not username: self._log.warn("UID %s %s not found locally; make sure " \ "/etc/passwd or %s on this host and your dCache are using " \ "the same UIDs,GIDs!" % (self._unix_gid_list_file_name,str(int(mappedUID)),str(int(mappedGID)))) except: self._log.warn("UID %s not found locally in /etc/passwed and %s does not exist or "\ "inaccessible " % (str(int(mappedUID)),self._unix_gid_list_file_name)) rec.LocalUserId(username) except (KeyboardInterrupt, SystemExit): raise except Exception as e: self._log.info("Failed to map UID %s to VO." % mappedUID) return rec
for var2 in lines: if var2.count('QDate') > 0: starttime = var2.split()[2] elif var2.count('RemoteWallClockTime') > 0: walltime = var2.split()[2] elif var2.count('CompletionDate') > 0: endtime = var2.split()[2] elif var2.count('Owner') > 0: if var2.split()[2] == '"boinc"': boincjob = True if boincjob == True: Gratia.setProbeBatchManager('Condor') Gratia.Initialize() r = Gratia.UsageRecord('Condor') r.ResourceType('Backfill') # parsing the filenames for the hostname/localjobid. # the files are in the format: history.<hostname>#<localjobid>#1#<localjobid> host = var.partition('.')[2].partition('#')[0] localjobid = var.partition('.')[2].partition('#')[2].partition( '#')[0] # print 'endtime: ' + endtime # print 'starttime: ' + starttime # print 'walltime: ' + walltime # Gratia likes ints, not strings, for times.