def Initialize(customConfig='ProbeConfig'): '''This function initializes the Gratia metering engine''' if len(sandbox_mgmt.backupDirList) == 0: # This has to be the first thing done (DebugPrint uses # the information config.Config = probe_config.ProbeConfiguration(customConfig) DebugPrint(0, 'Initializing Gratia with ' + customConfig) # Initialize cleanup function. atexit.register(__disconnect_at_exit__) global_state.bundle_size = Config.get_BundleSize() connect_utils.timeout = Config.get_ConnectionTimeout() global_state.CurrentBundle = bundle.Bundle() send.Handshake() # Need to initialize the list of possible directories sandbox_mgmt.InitDirList() # Need to look for left over files sandbox_mgmt.SearchOutstandingRecord() # Process the Condor-CE history directory. condor_ce.processHistoryDir() # Attempt to reprocess any outstanding records reprocess.Reprocess()
def Reprocess(): _, result = ReprocessList() while not connect_utils.connectionError and result and sandbox_mgmt.hasMoreOutstandingRecord: # This is decreased in SearchOutstanding tarcount = sandbox_mgmt.outstandingStagedTarCount scount = sandbox_mgmt.outstandingStagedRecordCount # Need to look for left over files sandbox_mgmt.SearchOutstandingRecord() if len(sandbox_mgmt.outstandingRecord) == 0: DebugPrint(4, 'DEBUG: quit reprocessing loop due empty list') break # This is potentially decreased in ReprocessList rcount = sandbox_mgmt.outstandingRecordCount # Attempt to reprocess any outstanding records ReprocessList() if rcount == sandbox_mgmt.outstandingRecordCount and scount == sandbox_mgmt.outstandingStagedRecordCount and tarcount \ == sandbox_mgmt.outstandingStagedTarCount: DebugPrint( 3, 'Reprocessing seems stalled, stopping it until next successful send' ) # We are not making progress break
def Maintenance(): '''This perform routine maintenance that is usually done at''' send.Handshake() # Need to look for left over files sandbox_mgmt.SearchOutstandingRecord() # Attempt to reprocess any outstanding records reprocess.Reprocess() ProcessCurrentBundle()
def Maintenance(): '''This perform routine maintenance that is usually done at''' send.Handshake() # Need to look for left over files sandbox_mgmt.SearchOutstandingRecord() # Attempt to reprocess any outstanding records reprocess.Reprocess() if global_state.bundle_size > 1 and global_state.CurrentBundle.nItems > 0: responseString, _ = bundle.ProcessBundle(global_state.CurrentBundle) DebugPrint(0, responseString) DebugPrint(0, '***********************************************************')
def SendHandshake(record): global successfulHandshakes global failedHandshakes DebugPrint(0, '***********************************************************') # Assemble the record into xml record.XmlCreate() # Parse it into nodes, etc (transitional: this will eventually be native format) xmlDoc = safeParseXML(string.join(record.XmlData, r'')) if not xmlDoc: failedHandshakes += 1 responseString = 'Internal Error: cannot parse internally generated XML record' DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') return responseString xmlDoc.normalize() # Generate the XML record.XmlData = safeEncodeXML(xmlDoc).splitlines(True) # Close and clean up the document xmlDoc.unlink() # Currently, the recordXml is in a list format, with each item being a line of xml. # the collector web service requires the xml to be sent as a string. # This logic here turns the xml list into a single xml string. usageXmlString = r'' for line in record.XmlData: usageXmlString = usageXmlString + line DebugPrint(3, 'UsageXml: ' + usageXmlString) connectionProblem = connect_utils.connectionRetries > 0 or connect_utils.connectionError if global_state.bundle_size > 1: # Delay the sending until we have 'bundle_size' records. responseString, response_obj = global_state.CurrentBundle.addHandshake( usageXmlString) else: # Attempt to send the record to the collector. Note that this must # be sent currently as an update, not as a handshake. response_obj = connect_utils.sendUsageXML(Config.get_ProbeName(), usageXmlString) responseString = response_obj.getMessage() DebugPrint(1, 'Response code: ' + str(response_obj.getCode())) DebugPrint(1, 'Response message: ' + response_obj.getMessage()) # Determine if the call was successful based on the response # code. Currently, 0 = success if response_obj.getCode() == 0: DebugPrint(1, 'Response indicates success, ') successfulHandshakes += 1 if connectionProblem or sandbox_mgmt.hasMoreOutstandingRecord: # Reprocess failed records before attempting more new ones sandbox_mgmt.SearchOutstandingRecord() reprocess.Reprocess() else: DebugPrint(1, 'Response indicates failure, ') failedHandshakes += 1 DebugPrint(0, responseString) DebugPrint(0, '***********************************************************') return responseString
def Send(record): try: DebugPrint( 0, '***********************************************************') DebugPrint(4, 'DEBUG: In Send(record)') DebugPrint(4, 'DEBUG: Printing record to send') record.Print() DebugPrint(4, 'DEBUG: Printing record to send: OK') DebugPrint( 4, 'DEBUG: File Count: ' + str(sandbox_mgmt.outstandingRecordCount)) toomanyfiles = sandbox_mgmt.outstandingRecordCount >= Config.get_MaxPendingFiles( ) if global_state.estimatedServiceBacklog > 0: global_state.estimatedServiceBacklog -= 1 # Assemble the record into xml DebugPrint(4, 'DEBUG: Creating XML') record.XmlCreate() DebugPrint(4, 'DEBUG: Creating XML: OK') # Parse it into nodes, etc DebugPrint(4, 'DEBUG: parsing XML') xmlDoc = safeParseXML(string.join(record.XmlData, r'')) DebugPrint(4, 'DEBUG: parsing XML: OK') if not xmlDoc: responseString = 'Internal Error: cannot parse internally generated XML record' # We intentionally do not delete the input files. DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') return responseString DebugPrint(4, 'DEBUG: Checking XML content') if not XmlChecker.CheckXmlDoc(xmlDoc, False): DebugPrint(4, 'DEBUG: Checking XML content: BAD') xmlDoc.unlink() responseString = 'OK: No unsuppressed usage records in this packet: not sending' record.QuarantineTransientInputFiles() bundle.suppressedCount += 1 DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') return responseString DebugPrint(4, 'DEBUG: Checking XML content: OK') DebugPrint(4, 'DEBUG: Normalizing XML document') xmlDoc.normalize() DebugPrint(4, 'DEBUG: Normalizing XML document: OK') # Generate the XML DebugPrint(4, 'DEBUG: Generating data to send') record.XmlData = safeEncodeXML(xmlDoc).splitlines(True) DebugPrint(4, 'DEBUG: Generating data to send: OK') # Close and clean up the document2 xmlDoc.unlink() dirIndex = 0 success = False f = 0 DebugPrint(4, 'DEBUG: Attempt to back up record to send') while not success: (f, dirIndex) = sandbox_mgmt.OpenNewRecordFile(dirIndex) DebugPrint(3, 'Will save the record in:', f.name) DebugPrint(3, 'dirIndex=', dirIndex) if f.name != '<stdout>': try: for line in record.XmlData: f.write(line) f.flush() if f.tell() > 0: success = True DebugPrint(1, 'Saved record to ' + f.name) else: DebugPrint(0, 'failed to fill: ', f.name) if f.name != '<stdout>': sandbox_mgmt.RemoveRecordFile(f.name) f.close() record.RemoveTransientInputFiles() except: DebugPrint( 0, 'failed to fill with exception: ', f.name, '--', sys.exc_info(), '--', sys.exc_info()[0], '++', sys.exc_info()[1], ) DebugPrint(4, 'DEBUG: Backing up record to send: OK') else: break # Currently, the recordXml is in a list format, with each item being a line of xml. # the collector web service requires the xml to be sent as a string. # This logic here turns the xml list into a single xml string. usageXmlString = r'' for line in record.XmlData: usageXmlString = usageXmlString + line DebugPrint(3, 'UsageXml: ' + usageXmlString) connectionProblem = connect_utils.connectionRetries > 0 or connect_utils.connectionError if global_state.bundle_size > 1 and f.name != '<stdout>': # Delay the sending until we have 'bundle_size' records. (responseString, response_obj) = global_state.CurrentBundle.addRecord( f.name, usageXmlString) else: # Attempt to send the record to the collector response_obj = connect_utils.sendUsageXML(Config.get_ProbeName(), usageXmlString) responseString = response_obj.getMessage() DebugPrint(1, 'Response code: ' + str(response_obj.getCode())) DebugPrint(1, 'Response message: ' + response_obj.getMessage()) # Determine if the call was successful based on the response # code. Currently, 0 = success if response_obj.getCode() == 0: if f.name != '<stdout>': DebugPrint( 1, 'Response indicates success, ' + f.name + ' will be deleted') sandbox_mgmt.RemoveRecordFile(f.name) else: record.RemoveTransientInputFiles() DebugPrint(1, 'Response indicates success') bundle.successfulSendCount += 1 else: bundle.failedSendCount += 1 if toomanyfiles: DebugPrint( 1, 'Due to too many pending files and a connection error, the following record was not sent and has not been backed up.' ) DebugPrint(1, 'Lost record: ' + usageXmlString) responseString = 'Fatal Error: too many pending files' elif f.name == '<stdout>': DebugPrint( 0, 'Record send failed and no backup made: record lost!') responseString += '\nFatal: failed record lost!' match = re.search(r'^<(?:[^:]*:)?RecordIdentity.*/>$', usageXmlString, re.MULTILINE) if match: DebugPrint(0, match.group(0)) responseString += ('\n', match.group(0)) match = re.search(r'^<(?:[^:]*:)?GlobalJobId.*/>$', usageXmlString, re.MULTILINE) if match: DebugPrint(0, match.group(0)) responseString += ('\n', match.group(0)) responseString += '\n' + usageXmlString else: DebugPrint( 1, 'Response indicates failure, ' + f.name + ' will not be deleted') DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') if (connectionProblem or sandbox_mgmt.hasMoreOutstandingRecord) and global_state.CurrentBundle.nItems == 0 \ and response_obj.getCode() == 0: # Reprocess failed records before attempting more new ones sandbox_mgmt.SearchOutstandingRecord() reprocess.Reprocess() return responseString except KeyboardInterrupt: raise except SystemExit: raise except Exception, e: DebugPrint( 0, 'ERROR: ' + str(e) + ' exception caught while processing record ') DebugPrint(0, ' This record has been LOST') DebugPrintTraceback() return 'ERROR: record lost due to internal error!'
def ProcessBundle(bundle): global successfulHandshakes global successfulSendCount global failedHandshakes global failedSendCount global successfulReprocessCount global successfulBundleCount global failedReprocessCount global quarantinedFiles global failedBundleCount responseString = r'' # Loop through and try to send any outstanding records bundleData = '''<?xml version="1.0" encoding="UTF-8"?> <RecordEnvelope> ''' for item in bundle.content: xmlData = None filename = item[0] xmlData = item[1] DebugPrint(1, 'Processing bundle file: ' + filename) if xmlData == r'': # Read the contents of the file into a string of xml try: in_file = open(filename, 'r') xmlData = in_file.read() in_file.close() except: DebugPrint( 1, 'Processing bundle failure: unable to read file: ' + filename) responseString = responseString + '\nUnable to read from ' + filename failedBundleCount += 1 continue if not xmlData: DebugPrint( 1, 'Processing bundle failure: ' + filename + ' was empty: skip send') responseString = responseString + '\nEmpty file ' + filename + ': XML not sent' failedBundleCount += 1 continue xmlData = __xmlintroRemove.sub(r'', xmlData) bundleData = bundleData + xmlData + '\n' # if (len(bundleData)==0): # bundleData = xmlData # else: # bundleData = bundleData + '|' + xmlData bundleData = bundleData + '</RecordEnvelope>' # Send the xml to the collector for processing response_obj = connect_utils.sendUsageXML(Config.get_ProbeName(), bundleData, 'multiupdate') DebugPrint( 2, 'Processing bundle Response code: ' + str(response_obj.getCode())) DebugPrint( 2, 'Processing bundle Response message: ' + response_obj.getMessage()) if response_obj.getCode() == response.Response.BundleNotSupported: DebugPrint( 0, "Collector is too old to handle 'bundles', reverting to sending individual records." ) global_state.bundle_size = 0 bundle.nLastProcessed = 0 hasHandshake = bundle.nHandshakes > 0 bundle.clear() if hasHandshake: # Done to break circular dependency between send and bundle __import__("gratia.common.send").common.send.Handshake() else: sandbox_mgmt.SearchOutstandingRecord() # Done to break circular dependency between bundle and reprocess __import__("gratia.common.reprocess").common.reprocess.Reprocess() return 'Bundling has been canceled.', response_obj elif response_obj.getCode() == response.Response.PostTooLarge: if bundle.nItems > 1: # We let a large record to be added to already too many data. # Let's try to restrict more the size of the record Bundle.decreaseMaxPostSize(0.9) #__maxPostSize = 0.9 * Bundle.__maxPostSize elif bundle.nItems == 1: DebugPrint( 0, 'Error: a record is larger than the Collector can receive. (' + str(len(bundleData) * 10 / 1000 / 1000 / 10.0) + 'Mb vs 2Mb). Record will be Quarantined.') quarantinedFiles += 1 sandbox_mgmt.QuarantineFile(bundle.content[0][0], False) else: DebugPrint( 0, "Internal error, got a 'too large of a post' response eventhough we have no record at all!" ) responseString = 'Processed bundle with ' + str( bundle.nItems) + ' records: ' + response_obj.getMessage() # Determine if the call succeeded, and remove the file if it did if response_obj.getCode() == 0: successfulSendCount += bundle.nRecords successfulHandshakes += bundle.nHandshakes successfulReprocessCount += bundle.nReprocessed successfulBundleCount += 1 for item in bundle.content: filename = item[0] if filename != r'': DebugPrint( 1, 'Bundle response indicates success, ' + filename + ' will be deleted') sandbox_mgmt.RemoveRecordFile(filename) responseString = 'OK - ' + responseString else: DebugPrint( 1, 'Response indicates failure, the following files will not be deleted:' ) for item in bundle.content: filename = item[0] if filename != r'': DebugPrint(1, ' ' + filename) failedSendCount += bundle.nRecords failedHandshakes += bundle.nHandshakes failedReprocessCount += bundle.nReprocessed failedBundleCount += 1 bundle.nLastProcessed = bundle.nItems bundle.clear() return responseString, response_obj