def CompressOutbox(probe_dir, outbox, outfiles): # Compress the probe_dir/outbox and stored the resulting tar.gz file # in probe_dir/staged global outstandingStagedTarCount staged_store = os.path.join(probe_dir, 'staged', 'store') Mkdir(staged_store) staging_name = GenerateFilename('tz.', staged_store) DebugPrint(1, 'Compressing outbox in tar.bz2 file: ' + staging_name) try: tar = tarfile.open(staging_name, 'w:bz2') except KeyboardInterrupt: raise except SystemExit: raise except Exception as e: DebugPrint(0, 'Warning: Exception caught while opening tar.bz2 file: ' + staging_name + ':') DebugPrint(0, 'Caught exception: ', e) DebugPrintTraceback() return False try: for f in outfiles: # Reduce the size of the file name in the archive arcfile = f.replace(Config.getFilenameFragment(), r'') arcfile = arcfile.replace('..', '.') tar.add(os.path.join(outbox, f), arcfile) except KeyboardInterrupt: raise except SystemExit: raise except Exception as e: DebugPrint(0, 'Warning: Exception caught while adding ' + f + ' from ' + outbox + ' to tar.bz2 file: ' + staging_name + ':') DebugPrint(0, 'Caught exception: ', e) DebugPrintTraceback() return False try: tar.close() except KeyboardInterrupt: raise except SystemExit: raise except Exception as e: DebugPrint(0, 'Warning: Exception caught while closing tar.bz2 file: ' + staging_name + ':') DebugPrint(0, 'Caught exception: ', e) DebugPrintTraceback() return False outstandingStagedTarCount += 1 return True
def UncompressOutbox(staging_name, target_dir): # Compress the probe_dir/outbox and stored the resulting tar.gz file # in probe_dir/staged # staged_dir = os.path.join(probe_dir,"staged") # outbox = os.path.join(probe_dir,"outbox") DebugPrint(1, 'Uncompressing: ' + staging_name) try: tar = tarfile.open(staging_name, 'r') except KeyboardInterrupt: raise except SystemExit: raise except Exception as e: DebugPrint(0, 'Warning: Exception caught while opening tar file: ' + staging_name + ':') DebugPrint(0, 'Caught exception: ', e) DebugPrintTraceback() return False try: for tarinfo in tar: DebugPrint(1, 'Extracting: ' + tarinfo.name) tar.extract(tarinfo, target_dir) except KeyboardInterrupt: raise except SystemExit: raise except Exception as e: DebugPrint(0, 'Warning: Exception caught while extracting from tar file: ' + staging_name + ':') DebugPrint(0, 'Caught exception: ', e) DebugPrintTraceback() return False try: tar.close() except KeyboardInterrupt: raise except SystemExit: raise except Exception as e: DebugPrint(0, 'Warning: Exception caught while closing tar file: ' + staging_name + ':') DebugPrint(0, 'Caught exception: ', e) DebugPrintTraceback() return False return True
def processHistoryDir(): """ Condor schedd will write one file per finished job into this directory. We must convert it from a Condor ClassAd to a certinfo file and rename it based on the routed job's name. """ history_dir = Config.get_CondorCEHistoryFolder() output_dir = Config.get_DataFolder() if not history_dir: DebugPrint(3, "No Condor-CE history specified; will not process for" \ " certinfo.") if not os.path.exists(history_dir): DebugPrint(3, "Condor-CE history directory %s does not exist." \ % history_dir) for full_filename in glob.glob(os.path.join(history_dir, "history.*")): _, filename = os.path.split(full_filename) if not historyRe.match(filename): DebugPrint( 3, "Ignoring history file %s as it does not match " "the regular expression" % filename) continue try: classadToCertinfo(full_filename, output_dir) except KeyboardInterrupt: raise except SystemExit: raise except Exception, e: DebugPrint(0, "Failure when trying to process Condor-CE history %s" \ " into a certinfo file: %s" % (filename, str(e))) DebugPrintTraceback(e)
def __InitializeDictionary__(): """ For internal use only. Parse the user-vo-map file and initialize the module-internal data structures with the contents. From now on, VO information lookup in this module will be done via an in-memory lookup. Will only be attempted once """ # Check if there was previously an error # If so, do not retry initialization global __dictionaryErrorStatus, __voiToVOcDictionary, __UserVODictionary if __dictionaryErrorStatus: return __voiToVOcDictionary = {} __UserVODictionary = {} __dictionaryErrorStatus = True mapfile = config.Config.get_UserVOMapFile() if mapfile == None: DebugPrint(2, "WARNING: No mapfile specified; not using VO mapping.") return try: __InitializeDictionary_internal(mapfile) __dictionaryErrorStatus = False except IOError, e: DebugPrint( 0, 'WARNING: IO error exception initializing user-vo-map mapfile %s: %s' % (mapfile, str(e))) DebugPrintTraceback()
def CompressOutbox(probe_dir, outbox, outfiles): # Compress the probe_dir/outbox and stored the resulting tar.gz file # in probe_dir/staged global outstandingStagedTarCount staged_store = os.path.join(probe_dir, 'staged', 'store') Mkdir(staged_store) staging_name = GenerateFilename('tz.', staged_store) DebugPrint(1, 'Compressing outbox in tar.bz2 file: ' + staging_name) try: tar = tarfile.open(staging_name, 'w:bz2') except KeyboardInterrupt: raise except SystemExit: raise except Exception, e: DebugPrint( 0, 'Warning: Exception caught while opening tar.bz2 file: ' + staging_name + ':') DebugPrint(0, 'Caught exception: ', e) DebugPrintTraceback() return False
def __disconnect_at_exit__(): """ Insure that we properly shutdown the connection at the end of the process. This includes sending any outstanding records and printing the statistics """ if global_state.bundle_size > 1 and global_state.CurrentBundle.nItems > 0: responseString, _ = bundle.ProcessBundle(global_state.CurrentBundle) DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') connect_utils.disconnect() if config.Config: try: sandbox_mgmt.RemoveOldLogs(Config.get_LogRotate()) sandbox_mgmt.RemoveOldJobData(Config.get_DataFileExpiration()) sandbox_mgmt.RemoveOldQuarantine(Config.get_DataFileExpiration(), Config.get_QuarantineSize()) except KeyboardInterrupt: raise except SystemExit: raise except Exception, exception: DebugPrint(0, 'Exception caught at top level: ' + str(exception)) DebugPrintTraceback()
def connect(): ## ## __connect ## ## Author - Tim Byrne ## ## Connect to the web service on the given server, sets the module-level object __connection__ ## equal to the new connection. Will not reconnect if __connection__ is already connected. ## global connection global connected global connectionError global connectionRetries global __retryDelay global __last_retry_time # __connectionError__ = True # return connected if connectionError: disconnect() connectionError = False if connectionRetries > __maxConnectionRetries__: current_time = time.time() if not __last_retry_time: # Set time but do not reset failures __last_retry_time = current_time return connected if current_time - __last_retry_time > __retryDelay: __last_retry_time = current_time DebugPrint(1, 'Retry connection after ', __retryDelay, 's') __retryDelay = __retryDelay * __backoff_factor if __retryDelay > __maximumDelay: __retryDelay = __maximumDelay connectionRetries = 0 connectionRetries += 1 if not connected and connectionRetries <= __maxConnectionRetries__: if Config.get_UseSSL() == 0 and Config.get_UseSoapProtocol() == 1: DebugPrint(0, 'Error: SOAP connection is no longer supported.') __connectionError__ = True return connected elif Config.get_UseSSL() == 0 and Config.get_UseSoapProtocol() == 0: try: if ProxyUtil.findHTTPProxy(): DebugPrint(0, 'WARNING: http_proxy is set but not supported') # __connection__ = ProxyUtil.HTTPConnection(Config.get_SOAPHost(), # http_proxy = ProxyUtil.findHTTPProxy()) connection = httplib.HTTPConnection(Config.get_SOAPHost()) except KeyboardInterrupt: raise except SystemExit: raise except Exception, ex: DebugPrint(0, 'ERROR: could not initialize HTTP connection') DebugPrintTraceback() connectionError = True return connected try: prev_handler = signal.signal(signal.SIGALRM, __handle_timeout__) signal.alarm(timeout) DebugPrint(4, 'DEBUG: Connect') connection.connect() DebugPrint(4, 'DEBUG: Connect: OK') signal.alarm(0) signal.signal(signal.SIGALRM, prev_handler) except socket.error, ex: DebugPrint(3, 'Socket connection error: ' + str(ex)) connectionError = True raise except GratiaTimeout: DebugPrint(3, 'Connection timeout (GratiaTimeout exception).') connectionError = True raise
def sendUsageXML(meterId, recordXml, messageType='URLEncodedUpdate'): """ sendUsageXML Author - Tim Byrne Contacts the 'GratiaCollector' web service, sending it an xml representation of Usage data param - meterId: A unique Id for this meter, something the web service can use to identify communication from this meter param - xmlData: A string representation of usage xml """ global connectionError global certificateRejected global __resending # Backward compatibility with old collectors if global_state.collector__wantsUrlencodeRecords == 0: messageType = 'update' try: # Connect to the web service, in case we aren't already # connected. If we are already connected, this call will do # nothing if not connect(): # Failed to connect raise IOError # Kick out to except: clause # Generate a unique Id for this transaction transactionId = meterId + utils.TimeToString().replace(':', r'') DebugPrint(3, 'TransactionId: ' + transactionId) if Config.get_UseSSL() == 0 and Config.get_UseSoapProtocol() == 1: DebugPrint(0, 'Error: SOAP connection is no longer supported.') connectionError = True response_obj = response.Response( response.Response.Failed, 'Error: SOAP connection is no longer supported.') elif Config.get_UseSSL() == 0 and Config.get_UseSoapProtocol() == 0: queryString = encodeData(messageType, recordXml) # Attempt to make sure Collector can actually read the post. headers = {'Content-type': 'application/x-www-form-urlencoded'} responseString = postRequest(connection, Config.get_CollectorService(), queryString, headers) response_obj = response.Response(response.Response.AutoSet, responseString) if response_obj.getCode() == response.Response.UnknownCommand: # We're talking to an old collector DebugPrint( 0, 'Unable to send new record to old collector -- engaging backwards-compatible mode for remainder of connection' ) global_state.collector__wantsUrlencodeRecords = 0 # Try again with the same record before returning to the # caller. There will be no infinite recursion because # __url_records has been reset response_obj = sendUsageXML(meterId, recordXml, messageType) else: # SSL DebugPrint(4, 'DEBUG: Encoding data for SSL transmission') queryString = encodeData(messageType, recordXml) DebugPrint(4, 'DEBUG: Encoding data for SSL transmission: OK') # Attempt to make sure Collector can actually read the post. headers = {'Content-type': 'application/x-www-form-urlencoded'} responseString = postRequest(connection, Config.get_SSLCollectorService(), queryString, headers) response_obj = response.Response(response.Response.AutoSet, responseString) if response_obj.getCode() == response.Response.UnknownCommand: # We're talking to an old collector DebugPrint( 0, 'Unable to send new record to old collector -- engaging backwards-compatible mode for remainder of connection' ) global_state.collector__wantsUrlencodeRecords = 0 # Try again with the same record before returning to the # caller. There will be no infinite recursion because # __url_records has been reset response_obj = sendUsageXML(meterId, recordXml, messageType) elif response_obj.getCode() == response.Response.BadCertificate: connectionError = True certificateRejected = True response_obj = response.Response(response.Response.AutoSet, responseString) if response_obj.getCode == response.Response.ConnectionError or response_obj.getCode == response.Response.CollectorError: # Server threw an error - 503, maybe? connectionError = True response_obj = response.Response( response.Response.Failed, r'Server unable to receive data: save for reprocessing') except SystemExit: raise except socket.error, ex: if ex.args[0] == 111: DebugPrint( 0, 'Connection refused while attempting to send xml to web service' ) else: DebugPrint( 0, 'Failed to send xml to web service due to an error of type "', sys.exc_info()[0], '": ', sys.exc_info()[1]) DebugPrintTraceback(1) response_obj = response.Response( response.Response.Failed, r'Server unable to receive data: save for reprocessing')
except GratiaTimeout: DebugPrint(3, 'Connection timeout (GratiaTimeout exception).') connectionError = True raise except KeyboardInterrupt: raise except SystemExit: raise except Exception, ex: connectionError = True DebugPrint(4, 'DEBUG: Connect: FAILED') DebugPrint( 0, 'Error: While trying to connect to HTTP, caught exception ' + str(ex)) DebugPrintTraceback() return connected DebugPrint(1, 'Connection via HTTP to: ' + Config.get_SOAPHost()) else: # print "Using POST protocol" # assert(Config.get_UseSSL() == 1) if Config.get_UseGratiaCertificates() == 0: pr_cert_file = Config.get_CertificateFile() pr_key_file = Config.get_KeyFile() else: pr_cert_file = Config.get_GratiaCertificateFile() pr_key_file = Config.get_GratiaKeyFile() if pr_cert_file == None:
def Send(record): try: DebugPrint( 0, '***********************************************************') DebugPrint(4, 'DEBUG: In Send(record)') DebugPrint(4, 'DEBUG: Printing record to send') record.Print() DebugPrint(4, 'DEBUG: Printing record to send: OK') DebugPrint( 4, 'DEBUG: File Count: ' + str(sandbox_mgmt.outstandingRecordCount)) toomanyfiles = sandbox_mgmt.outstandingRecordCount >= Config.get_MaxPendingFiles( ) if global_state.estimatedServiceBacklog > 0: global_state.estimatedServiceBacklog -= 1 # Assemble the record into xml DebugPrint(4, 'DEBUG: Creating XML') record.XmlCreate() DebugPrint(4, 'DEBUG: Creating XML: OK') # Parse it into nodes, etc DebugPrint(4, 'DEBUG: parsing XML') xmlDoc = safeParseXML(string.join(record.XmlData, r'')) DebugPrint(4, 'DEBUG: parsing XML: OK') if not xmlDoc: responseString = 'Internal Error: cannot parse internally generated XML record' # We intentionally do not delete the input files. DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') return responseString DebugPrint(4, 'DEBUG: Checking XML content') if not XmlChecker.CheckXmlDoc(xmlDoc, False): DebugPrint(4, 'DEBUG: Checking XML content: BAD') xmlDoc.unlink() responseString = 'OK: No unsuppressed usage records in this packet: not sending' record.QuarantineTransientInputFiles() bundle.suppressedCount += 1 DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') return responseString DebugPrint(4, 'DEBUG: Checking XML content: OK') DebugPrint(4, 'DEBUG: Normalizing XML document') xmlDoc.normalize() DebugPrint(4, 'DEBUG: Normalizing XML document: OK') # Generate the XML DebugPrint(4, 'DEBUG: Generating data to send') record.XmlData = safeEncodeXML(xmlDoc).splitlines(True) DebugPrint(4, 'DEBUG: Generating data to send: OK') # Close and clean up the document2 xmlDoc.unlink() dirIndex = 0 success = False f = 0 DebugPrint(4, 'DEBUG: Attempt to back up record to send') while not success: (f, dirIndex) = sandbox_mgmt.OpenNewRecordFile(dirIndex) DebugPrint(3, 'Will save the record in:', f.name) DebugPrint(3, 'dirIndex=', dirIndex) if f.name != '<stdout>': try: for line in record.XmlData: f.write(line) f.flush() if f.tell() > 0: success = True DebugPrint(1, 'Saved record to ' + f.name) else: DebugPrint(0, 'failed to fill: ', f.name) if f.name != '<stdout>': sandbox_mgmt.RemoveRecordFile(f.name) f.close() record.RemoveTransientInputFiles() except: DebugPrint( 0, 'failed to fill with exception: ', f.name, '--', sys.exc_info(), '--', sys.exc_info()[0], '++', sys.exc_info()[1], ) DebugPrint(4, 'DEBUG: Backing up record to send: OK') else: break # Currently, the recordXml is in a list format, with each item being a line of xml. # the collector web service requires the xml to be sent as a string. # This logic here turns the xml list into a single xml string. usageXmlString = r'' for line in record.XmlData: usageXmlString = usageXmlString + line DebugPrint(3, 'UsageXml: ' + usageXmlString) connectionProblem = connect_utils.connectionRetries > 0 or connect_utils.connectionError if global_state.bundle_size > 1 and f.name != '<stdout>': # Delay the sending until we have 'bundle_size' records. (responseString, response_obj) = global_state.CurrentBundle.addRecord( f.name, usageXmlString) else: # Attempt to send the record to the collector response_obj = connect_utils.sendUsageXML(Config.get_ProbeName(), usageXmlString) responseString = response_obj.getMessage() DebugPrint(1, 'Response code: ' + str(response_obj.getCode())) DebugPrint(1, 'Response message: ' + response_obj.getMessage()) # Determine if the call was successful based on the response # code. Currently, 0 = success if response_obj.getCode() == 0: if f.name != '<stdout>': DebugPrint( 1, 'Response indicates success, ' + f.name + ' will be deleted') sandbox_mgmt.RemoveRecordFile(f.name) else: record.RemoveTransientInputFiles() DebugPrint(1, 'Response indicates success') bundle.successfulSendCount += 1 else: bundle.failedSendCount += 1 if toomanyfiles: DebugPrint( 1, 'Due to too many pending files and a connection error, the following record was not sent and has not been backed up.' ) DebugPrint(1, 'Lost record: ' + usageXmlString) responseString = 'Fatal Error: too many pending files' elif f.name == '<stdout>': DebugPrint( 0, 'Record send failed and no backup made: record lost!') responseString += '\nFatal: failed record lost!' match = re.search(r'^<(?:[^:]*:)?RecordIdentity.*/>$', usageXmlString, re.MULTILINE) if match: DebugPrint(0, match.group(0)) responseString += ('\n', match.group(0)) match = re.search(r'^<(?:[^:]*:)?GlobalJobId.*/>$', usageXmlString, re.MULTILINE) if match: DebugPrint(0, match.group(0)) responseString += ('\n', match.group(0)) responseString += '\n' + usageXmlString else: DebugPrint( 1, 'Response indicates failure, ' + f.name + ' will not be deleted') DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') if (connectionProblem or sandbox_mgmt.hasMoreOutstandingRecord) and global_state.CurrentBundle.nItems == 0 \ and response_obj.getCode() == 0: # Reprocess failed records before attempting more new ones sandbox_mgmt.SearchOutstandingRecord() reprocess.Reprocess() return responseString except KeyboardInterrupt: raise except SystemExit: raise except Exception, e: DebugPrint( 0, 'ERROR: ' + str(e) + ' exception caught while processing record ') DebugPrint(0, ' This record has been LOST') DebugPrintTraceback() return 'ERROR: record lost due to internal error!'
def __disconnect_at_exit__(): """ Insure that we properly shutdown the connection at the end of the process. This includes sending any outstanding records and printing the statistics """ if global_state.bundle_size > 1 and global_state.CurrentBundle.nItems > 0: responseString, _ = bundle.ProcessBundle(global_state.CurrentBundle) DebugPrint(0, responseString) DebugPrint( 0, '***********************************************************') connect_utils.disconnect() if config.Config: try: sandbox_mgmt.RemoveOldLogs(Config.get_LogRotate()) sandbox_mgmt.RemoveOldJobData(Config.get_DataFileExpiration()) sandbox_mgmt.RemoveOldQuarantine(Config.get_DataFileExpiration(), Config.get_QuarantineSize()) except KeyboardInterrupt: raise except SystemExit: raise except Exception as exception: DebugPrint(0, 'Exception caught at top level: ' + str(exception)) DebugPrintTraceback() DebugPrint( 0, 'End of execution summary: new records sent successfully: ' + str(bundle.successfulSendCount)) DebugPrint( 0, ' new records suppressed: ' + str(bundle.suppressedCount)) DebugPrint( 0, ' new records failed: ' + str(bundle.failedSendCount)) DebugPrint( 0, ' records reprocessed successfully: ' + str(bundle.successfulReprocessCount)) DebugPrint( 0, ' reprocessed records failed: ' + str(bundle.failedReprocessCount)) DebugPrint( 0, ' handshake records sent successfully: ' + str(bundle.successfulHandshakes)) DebugPrint( 0, ' handshake records failed: ' + str(bundle.failedHandshakes)) DebugPrint( 0, ' bundle of records sent successfully: ' + str(bundle.successfulBundleCount)) DebugPrint( 0, ' bundle of records failed: ' + str(bundle.failedBundleCount)) DebugPrint( 0, ' outstanding records: ' + str(sandbox_mgmt.outstandingRecordCount)) DebugPrint( 0, ' outstanding staged records: ' + str(sandbox_mgmt.outstandingStagedRecordCount)) DebugPrint( 0, ' outstanding records tar files: ' + str(sandbox_mgmt.outstandingStagedTarCount)) DebugPrint(1, 'End-of-execution disconnect ...')
def UsageCheckXmldoc(xmlDoc, external, resourceType=None): '''Fill in missing field in the xml document if needed''' DebugPrint(4, 'DEBUG: In UsageCheckXmldoc') DebugPrint(4, 'DEBUG: Checking xmlDoc integrity') if not xmlDoc.documentElement: # Major problem return 0 DebugPrint(4, 'DEBUG: Checking xmlDoc integrity: OK') DebugPrint(4, 'DEBUG: XML record to send: \n' + xmlDoc.toxml()) # Local namespace namespace = xmlDoc.documentElement.namespaceURI # Loop over (posibly multiple) jobUsageRecords DebugPrint(4, 'DEBUG: About to examine individual UsageRecords') for usageRecord in getUsageRecords(xmlDoc): DebugPrint(4, 'DEBUG: Examining UsageRecord') DebugPrint(4, 'DEBUG: Looking for prefix') # Local namespace and prefix, if any prefix = r'' for child in usageRecord.childNodes: if child.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and child.prefix: prefix = child.prefix + ':' break DebugPrint(4, 'DEBUG: Looking for prefix: ' + prefix) StandardCheckXmldoc(xmlDoc, usageRecord, external, prefix) # Add ResourceType if appropriate if external and resourceType != None: DebugPrint( 4, 'DEBUG: Adding missing resourceType ' + str(resourceType)) AddResourceIfMissingKey( xmlDoc, usageRecord, namespace, prefix, 'ResourceType', resourceType, ) # Identity info check VOName = None DebugPrint(4, 'DEBUG: Finding userIdentityNodes') userIdentityNodes = usageRecord.getElementsByTagNameNS( namespace, 'UserIdentity') DebugPrint(4, 'DEBUG: Finding userIdentityNodes (processing)') if not userIdentityNodes: DebugPrint(4, 'DEBUG: Finding userIdentityNodes: 0') [jobIdType, jobId] = FindBestJobId(usageRecord, namespace) DebugPrint( 0, 'Warning: no UserIdentity block in ' + jobIdType + ' ' + jobId) else: try: id_info = {} DebugPrint(4, 'DEBUG: Finding userIdentityNodes (processing 2)') DebugPrint( 4, 'DEBUG: Finding userIdentityNodes: ' + str(userIdentityNodes.length)) if userIdentityNodes.length > 1: [jobIdType, jobId] = FindBestJobId(usageRecord, namespace) DebugPrint( 0, 'Warning: too many UserIdentity blocks in ' + jobIdType + ' ' + jobId) DebugPrint(4, 'DEBUG: Call CheckAndExtendUserIdentity') id_info = CheckAndExtendUserIdentity(xmlDoc, userIdentityNodes[0], namespace, prefix) DebugPrint(4, 'DEBUG: Call CheckAndExtendUserIdentity: OK') ResourceType = FirstResourceMatching(xmlDoc, usageRecord, namespace, prefix, 'ResourceType') DebugPrint(4, 'DEBUG: Read ResourceType as ' + str(ResourceType)) if Config.get_NoCertinfoBatchRecordsAreLocal() and ResourceType and ResourceType == 'Batch' \ and not (id_info.has_key('has_certinfo') and id_info['has_certinfo']): # Set grid local DebugPrint(4, 'DEBUG: no certinfo: setting grid to Local') UpdateOrInsertElement( xmlDoc, usageRecord, namespace, prefix, 'Grid', 'Local', ) if id_info.has_key('VOName'): VOName = id_info['VOName'] except KeyboardInterrupt: raise except SystemExit: raise except Exception, e: DebugPrint(0, 'DEBUG: Caught exception: ', e) DebugPrintTraceback() raise # If we are trying to handle only GRID jobs, optionally suppress records. # # Order of preference from the point of view of data integrity: # # 1. With grid set to Local (modern condor probe (only) detects # attribute inserted in ClassAd by Gratia JobManager patch found # in OSG 1.0+). # # 2, Missing DN (preferred, but requires JobManager patch and # could miss non-delegated WS jobs). # # 3. A null or unknown VOName (prone to suppressing jobs we care # about if osg-user-vo-map.txt is not well-cared-for). reason = None isQuarantined = False grid = GetElement(xmlDoc, usageRecord, namespace, prefix, 'Grid') if Config.get_SuppressgridLocalRecords() and grid and string.lower( grid) == 'local': # 1 reason = 'Grid == Local' elif Config.get_SuppressNoDNRecords( ) and not usageRecord.getElementsByTagNameNS(namespace, 'DN'): # 2 reason = 'missing DN' elif Config.get_SuppressUnknownVORecords() and (not VOName or VOName == 'Unknown'): # 3 reason = 'unknown or null VOName' elif Config.get_QuarantineUnknownVORecords() and (not VOName or VOName == 'Unknown'): reason = 'unknown or null VOName, will be quarantined in %s' % ( os.path.join( os.path.join(Config.get_DataFolder(), "quarantine"))) isQuarantined = True if reason: [jobIdType, jobId] = FindBestJobId(usageRecord, namespace) DebugPrint( 0, 'Info: suppressing record with ' + jobIdType + ' ' + jobId + ' due to ' + reason) usageRecord.parentNode.removeChild(usageRecord) if isQuarantined: subdir = os.path.join(Config.get_DataFolder(), "quarantine", 'subdir.' + Config.getFilenameFragment()) if not os.path.exists(subdir): os.mkdir(subdir) fn = sandbox_mgmt.GenerateFilename("r.", subdir) writer = open(fn, 'w') usageRecord.writexml(writer) writer.close() usageRecord.unlink() continue
def __createCertificateFile(self, keyfile, certfile): # Get a fresh certificate. # if (False): # cakey = createKeyPair(crypto.TYPE_RSA, 1024) # careq = createCertRequest(cakey, CN='Certificate Authority') # cacert = createCertificate(careq, (careq, cakey), 0, (0, 60*60*24*365*1)) # one year # open(keyfile, 'w').write(crypto.dump_privatekey(crypto.FILETYPE_PEM, cakey)) # open(certfile, 'w').write(crypto.dump_certificate(crypto.FILETYPE_PEM, cacert)) # return True # else: # Download it from the server. # Try this only once per run if isCertrequestRejected(): return False # qconnection = ProxyUtil.HTTPConnection(self.get_SSLRegistrationHost(), # http_proxy = ProxyUtil.findHTTPProxy()) qconnection = httplib.HTTPConnection(self.get_SSLRegistrationHost()) qconnection.connect() queryString = urllib.urlencode([('command', 'request'), ('from', self.get_ProbeName()), ('arg1', 'not really')]) headers = {'Content-type': 'application/x-www-form-urlencoded'} qconnection.request('POST', self.get_RegistrationService(), queryString, headers) responseString = utils.bytes2str(qconnection.getresponse().read()) resplist = responseString.split(':') if len(resplist) == 3 and resplist[0] == 'ok': # We received the info, let's store it # cert = crypto.load_certificate(crypto.FILETYPE_PEM,resplist[1]) # key = crypto.load_privatekey(crypto.FILETYPE_PEM,resplist[1]) # First create any sub-directory if needed. keydir = os.path.dirname(keyfile) if keydir != r'' and os.path.exists(keydir) == 0: Mkdir(keydir) certdir = os.path.dirname(certfile) if certdir != r'' and os.path.exists(certdir) == 0: Mkdir(certdir) # and then save the pem files open(keyfile, 'w').write(resplist[2]) open(certfile, 'w').write(resplist[1]) else: # We could do # os.chmod(keyfile,0600) DebugPrint(4, 'DEBUG: Connect: FAILED') DebugPrint(0, 'Error: while getting new certificate: ' + responseString) DebugPrintTraceback() setCertrequestRejected() return False return True
def connect(): ## ## __connect ## ## Author - Tim Byrne ## ## Connect to the web service on the given server, sets the module-level object __connection__ ## equal to the new connection. Will not reconnect if __connection__ is already connected. ## global connection global connected global connectionError global connectionRetries global __retryDelay global __last_retry_time # __connectionError__ = True # return connected if connectionError: disconnect() connectionError = False if connectionRetries > __maxConnectionRetries__: current_time = time.time() if not __last_retry_time: # Set time but do not reset failures __last_retry_time = current_time return connected if current_time - __last_retry_time > __retryDelay: __last_retry_time = current_time DebugPrint(1, 'Retry connection after ', __retryDelay, 's') __retryDelay = __retryDelay * __backoff_factor if __retryDelay > __maximumDelay: __retryDelay = __maximumDelay connectionRetries = 0 connectionRetries += 1 if not connected and connectionRetries <= __maxConnectionRetries__: if Config.get_UseSSL() == 0 and Config.get_UseSoapProtocol() == 1: DebugPrint(0, 'Error: SOAP connection is no longer supported.') __connectionError__ = True return connected elif Config.get_UseSSL() == 0 and Config.get_UseSoapProtocol() == 0: try: if ProxyUtil.findHTTPProxy(): DebugPrint(0, 'WARNING: http_proxy is set but not supported') # __connection__ = ProxyUtil.HTTPConnection(Config.get_SOAPHost(), # http_proxy = ProxyUtil.findHTTPProxy()) connection = httplib.HTTPConnection(Config.get_SOAPHost()) except KeyboardInterrupt: raise except SystemExit: raise except Exception as ex: DebugPrint(0, 'ERROR: could not initialize HTTP connection') DebugPrintTraceback() connectionError = True return connected try: prev_handler = signal.signal(signal.SIGALRM, __handle_timeout__) signal.alarm(timeout) DebugPrint(4, 'DEBUG: Connect') connection.connect() DebugPrint(4, 'DEBUG: Connect: OK') signal.alarm(0) signal.signal(signal.SIGALRM, prev_handler) except socket.error as ex: DebugPrint(3, 'Socket connection error: ' + str(ex)) connectionError = True raise except GratiaTimeout: DebugPrint(3, 'Connection timeout (GratiaTimeout exception).') connectionError = True raise except KeyboardInterrupt: raise except SystemExit: raise except Exception as ex: connectionError = True DebugPrint(4, 'DEBUG: Connect: FAILED') DebugPrint( 0, 'Error: While trying to connect to HTTP, caught exception ' + str(ex)) DebugPrintTraceback() return connected DebugPrint(1, 'Connection via HTTP to: ' + Config.get_SOAPHost()) else: # print "Using POST protocol" # assert(Config.get_UseSSL() == 1) if Config.get_UseGratiaCertificates() == 0: pr_cert_file = Config.get_CertificateFile() pr_key_file = Config.get_KeyFile() else: pr_cert_file = Config.get_GratiaCertificateFile() pr_key_file = Config.get_GratiaKeyFile() if pr_cert_file == None: DebugPrint( 0, 'Error: While trying to connect to HTTPS, no valid local certificate.' ) connectionError = True return connected DebugPrint(4, 'DEBUG: Attempting to connect to HTTPS') try: if ProxyUtil.findHTTPSProxy(): DebugPrint(0, 'WARNING: http_proxy is set but not supported') # __connection__ = ProxyUtil.HTTPSConnection(Config.get_SSLHost(), # cert_file = pr_cert_file, # key_file = pr_key_file, # http_proxy = ProxyUtil.findHTTPSProxy()) connection = httplib.HTTPSConnection(Config.get_SSLHost(), cert_file=pr_cert_file, key_file=pr_key_file) except KeyboardInterrupt: raise except SystemExit: raise except Exception as ex: DebugPrint(0, 'ERROR: could not initialize HTTPS connection') DebugPrintTraceback() connectionError = True return connected try: prev_handler = signal.signal(signal.SIGALRM, __handle_timeout__) signal.alarm(timeout) DebugPrint(4, 'DEBUG: Connect') connection.connect() DebugPrint(4, 'DEBUG: Connect: OK') signal.alarm(0) signal.signal(signal.SIGALRM, prev_handler) except socket.error as ex: connectionError = True raise except GratiaTimeout: DebugPrint(3, 'Connection (GratiaTimeout exception).') connectionError = True raise except KeyboardInterrupt: raise except SystemExit: raise except Exception as ex: DebugPrint(4, 'DEBUG: Connect: FAILED') DebugPrint( 0, 'Error: While trying to connect to HTTPS, caught exception ' + str(ex)) DebugPrintTraceback() connectionError = True return connected DebugPrint(1, 'Connected via HTTPS to: ' + Config.get_SSLHost()) # print "Using SSL protocol" # Successful DebugPrint(4, 'DEBUG: Connection SUCCESS') connected = True # Reset connection retry count to 0 and the retry delay to its initial value connectionRetries = 0 __retryDelay = __initialDelay return connected