def _stream_compress_deflate(reply, compress_level, max_chunk): """Streaming compressor for the 'deflate' method. Generates output that is guaranteed to expand at the exact same chunk boundaries as original reply stream.""" # Create zlib compression object, with raw data stream (negative window size) z = zlib.compressobj(compress_level, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) # Data pending compression. We only take entire chunks from original # reply. Then process reply one chunk at a time. Whenever we have enough # data to compress, spit it out flushing the zlib engine entirely, so we # respect original chunk boundaries. npending = 0 pending = [] for chunk in reply: pending.append(chunk) npending += len(chunk) if npending >= max_chunk: part = z.compress(encodeUnicodeToBytes( "".join(pending))) + z.flush(zlib.Z_FULL_FLUSH) pending = [] npending = 0 yield part # Crank the compressor one more time for remaining output. if npending: yield z.compress(encodeUnicodeToBytes("".join(pending))) + z.flush( zlib.Z_FINISH)
def fake_authz_headers(hmac_key, method='HNLogin', login='******', name='Test User', dn="/test/dn", roles={}, format="list"): """Create fake authentication and authorisation headers compatible with the CMSWEB front-ends. Assumes you have the HMAC signing key the back-end will use to validate the headers. :arg str hmac_key: binary key data for signing headers. :arg str method: authentication method, one of X509Cert, X509Proxy, HNLogin, HostIP, AUCookie or None. :arg str login: account login name. :arg str name: account user name. :arg str dn: account X509 subject. :arg dict roles: role dictionary, each role with 'site' and 'group' lists. :returns: list of header name, value tuples to add to a HTTP request.""" headers = {'cms-auth-status': 'OK', 'cms-authn-method': method} if login: headers['cms-authn-login'] = login if name: headers['cms-authn-name'] = name if dn: headers['cms-authn-dn'] = dn for name, role in viewitems(roles): name = 'cms-authz-' + authz_canonical(name) headers[name] = [] for r in 'site', 'group': if r in role: headers[name].extend( ["%s:%s" % (r, authz_canonical(v)) for v in role[r]]) headers[name] = " ".join(headers[name]) prefix = suffix = "" hkeys = list(headers) for hk in sorted(hkeys): if hk != 'cms-auth-status': prefix += "h%xv%x" % (len(hk), len(headers[hk])) suffix += "%s%s" % (hk, headers[hk]) msg = prefix + "#" + suffix if PY3: hmac_key = encodeUnicodeToBytes(hmac_key) msg = encodeUnicodeToBytes(msg) cksum = hmac.new(hmac_key, msg, hashlib.sha1).hexdigest() headers['cms-authn-hmac'] = cksum if format == "list": return listitems(headers) else: return headers
def _sanitise_input(self, input_args=[], input_kwargs={}, method=None): """ Pull out the necessary input from kwargs (by name) and, failing that, pulls out the number required args from args, which assumes the arguments are positional. _sanitise_input is called automatically if you use the _addMethod/_addDAO convenience functions. If you add your method to the methods dictionary by hand you should call _sanitise_input explicitly. In all but the most basic cases you'll likely want to over-ride this, or at least treat its outcome with deep suspicion. TODO: Would be nice to loose the method argument and derive it in this method. Returns a dictionary of validated, sanitised input data. """ verb = request.method.upper() if len(input_args): input_args = list(input_args) if (len(input_args) + len(input_kwargs)) > len( self.methods[verb][method]['args']): self.debug('%s to %s expects %s argument(s), got %s' % (verb, method, len(self.methods[verb][method]['args']), (len(input_args) + len(input_kwargs)))) raise HTTPError( 400, 'Invalid input: Input arguments failed sanitation.') input_data = {} # VK, we must read input kwargs/args as string types # rather then unicode one. This is important for cx_Oracle # driver which will place parameters into binded queries # due to mixmatch (string vs unicode) between python and Oracle # we must pass string parameters. for a in self.methods[verb][method]['args']: if a in input_kwargs: v = input_kwargs[a] input_data[a] = decodeBytesToUnicode( v) if PY3 else encodeUnicodeToBytes(v) input_kwargs.pop(a) else: if len(input_args): v = input_args.pop(0) input_data[a] = decodeBytesToUnicode( v) if PY3 else encodeUnicodeToBytes(v) if input_kwargs: raise HTTPError( 400, 'Invalid input: Input arguments failed sanitation.') self.debug('%s raw data: %s' % (method, { 'args': input_args, 'kwargs': input_kwargs })) self.debug('%s sanitised input_data: %s' % (method, input_data)) return self._validate_input(input_data, verb, method)
def addBasicAuth(self, username, password): """Add basic auth headers to request""" ## TODO: base64.encodestring is deprecated # https://docs.python.org/3.8/library/base64.html#base64.encodestring # change to base64.encodebytes after we drop python2 username = encodeUnicodeToBytes(username) password = encodeUnicodeToBytes(password) encodedauth = base64.encodestring(b'%s:%s' % (username, password)).strip() if PY3: encodedauth = decodeBytesToUnicode(encodedauth) auth_string = "Basic %s" % encodedauth self.additionalHeaders["Authorization"] = auth_string
def _etag_tail(head, tail, etag): """Generator which first returns anything in `head`, then `tail`. Sets ETag header at the end to value of `etag` if it's defined and yields a value.""" for chunk in head: yield encodeUnicodeToBytes(chunk) for chunk in tail: yield encodeUnicodeToBytes(chunk) etagval = (etag and etag.value()) if etagval: cherrypy.response.headers["ETag"] = etagval
def __setattr__(self, name, value): if name.startswith("_internal_"): # skip test for internal setting object.__setattr__(self, name, value) return if isinstance(value, ConfigSection): # child ConfigSection self._internal_children.add(name) self._internal_settings.add(name) value._internal_parent_ref = self object.__setattr__(self, name, value) return if isinstance(value, unicode): # We should not use "ignore" in this case # if this failed before, it is better to have it fail also now. value = encodeUnicodeToBytes(value) # for backward compatibility use getattr and sure to work if the # _internal_skipChecks flag is not set if not getattr(self, '_internal_skipChecks', False): self._complexTypeCheck(name, value) object.__setattr__(self, name, value) self._internal_settings.add(name) return
def testZipEncodeStr(self): """ Test the zipEncodeStr function. """ message = """ %MSG-s CMSException: AfterFile 02-Jun-2010 14:31:43 CEST PostEndRun cms::Exception caught in cmsRun ---- EventProcessorFailure BEGIN EventProcessingStopped ---- ScheduleExecutionFailure BEGIN ProcessingStopped ---- InvalidReference BEGIN BadRefCore Attempt to dereference a RefCore containing an invalid ProductID has been detected. Please modify the calling code to test validity before dereferencing. cms::Exception going through module PatMCMatching/analyzePatMCMatching run: 1 lumi: 666672 event: 305 ---- InvalidReference END Exception going through path p ---- ScheduleExecutionFailure END an exception occurred during current event processing cms::Exception caught in CMS.EventProcessor and rethrown ---- EventProcessorFailure END """ encodedMessage = \ 'eNp1j8FqwzAMhu95Cl0G2yEhaXvyrU3dkkFHqfcCnq02hkQOtlz6+HM2MrbDdBLS9/1CxdNJHcsI7UnJh8GJnScBsL0yhoMbEOpV+ZqoXNVNDc1GrBuxWUMr1TucfWRJ9pKoMGMU4scHo9OtZ3C5G+O8L3OBvCPxOXiDMfpw0G5IAWEnj91b8Xvn6KbYTxPab0+ZHm0aUD7QpDn/r/qP1dFdD85e8IoBySz0Ts+j1md9y4zjxMAebGYWTsMCGE+sHeVk0JS/+Qqc79lkuNtDryN8IBLAc1VVL5+o0W8i' encodedMessage = encodeUnicodeToBytes(encodedMessage) self.assertEqual(zipEncodeStr(message, maxLen=300, compressLevel=9, steps=10, truncateIndicator=" (...)"), encodedMessage) # Test different maximum lengths # Encoded message should always be less than the maximum limit. for maxLen in (800, 500, 20): self.assertLessEqual( len(zipEncodeStr(message, maxLen=maxLen, compressLevel=9, steps=10, truncateIndicator=" (...)")), maxLen)
def addAttachment(self, id, rev, value, name=None, contentType=None, checksum=None, add_checksum=False): """ Add an attachment stored in value to a document identified by id at revision rev. If specified the attachement will be uploaded as name, other wise the attachment is named "attachment". If not set CouchDB will try to determine contentType and default to text/plain. If checksum is specified pass this to CouchDB, it will refuse if the MD5 checksum doesn't match the one provided. If add_checksum is True calculate the checksum of the attachment and pass that into CouchDB for validation. The checksum should be the base64 encoded binary md5 (as returned by hashlib.md5().digest()) """ if name is None: name = "attachment" req_headers = {} if add_checksum: # calculate base64 encoded MD5 keyhash = hashlib.md5() value_str = str(value) if not isinstance(value, (newstr, newbytes)) else value keyhash.update(encodeUnicodeToBytes(value_str)) content_md5 = base64.b64encode(keyhash.digest()) req_headers['Content-MD5'] = decodeBytesToUnicode(content_md5) if PY3 else content_md5 elif checksum: req_headers['Content-MD5'] = decodeBytesToUnicode(checksum) if PY3 else checksum return self.put('/%s/%s/%s?rev=%s' % (self.name, id, name, rev), value, encode=False, contentType=contentType, incoming_headers=req_headers)
def remapDBS3Keys(data, stringify=False, **others): """Fields have been renamed between DBS2 and 3, take fields from DBS3 and map to DBS2 values """ mapping = { 'num_file': 'NumberOfFiles', 'num_files': 'NumberOfFiles', 'num_event': 'NumberOfEvents', 'num_block': 'NumberOfBlocks', 'num_lumi': 'NumberOfLumis', 'event_count': 'NumberOfEvents', 'run_num': 'RunNumber', 'file_size': 'FileSize', 'block_size': 'BlockSize', 'file_count': 'NumberOfFiles', 'open_for_writing': 'OpenForWriting', 'logical_file_name': 'LogicalFileName', 'adler32': 'Adler32', 'check_sum': 'Checksum', 'md5': 'Md5', 'block_name': 'BlockName', 'lumi_section_num': 'LumiSectionNumber' } mapping.update(others) formatFunc = lambda x: encodeUnicodeToBytes(x) if stringify else x for name, newname in viewitems(mapping): if name in data: data[newname] = formatFunc(data[name]) return data
def enqueue( self, key, *parameters ): """ _enqueue_ Add a new work item to the queue. This may result in threads being spawned if there are threads available. """ self.lock.acquire() base64_encoder = base64.encodebytes if PY3 else base64.encodestring args = {'event': str(key), 'component' : self.component.config.Agent.componentName, 'payload' : base64_encoder(encodeUnicodeToBytes(pickle.dumps(parameters))), 'thread_pool_id' : self.threadPoolId} myThread = threading.currentThread() myThread.transaction.begin() self.query.insertWork(args, self.poolTableBufferIn) # we need to commit here otherwise the thread transaction might not # see it. check if this buffer needs to be flushed. myThread.transaction.commit() myThread.transaction.begin() bufferSize = self.query.getQueueLength(\ {'componentName' : self.component.config.Agent.componentName, \ 'thread_pool_id' : self.threadPoolId}, self.poolTableBufferIn) if bufferSize > self.bufferSize: self.query.moveWorkFromBufferIn(self.poolTableBufferIn, \ self.poolTable) #FIXME: we should call the msgService finsih method here before #this commit so we know the event/payload is transferred to a thread. myThread.transaction.commit() #logging.info("THREADPOOL: Enqueued item") # enqueue the work item self.callQueue += 1 # check if there is a slave in the queue (then resue it) thread = None if len( self.slaveQueue ): # There is an available server: spawn a thread slave = self.slaveQueue[0] del self.slaveQueue[0] # Increment the count of active threads self.activeCount += 1 thread = threading.Thread( target = self.slaveThread, \ args=(slave,) ) thread.start() # check if we can instantiate more slaves. else: if self.activeCount < self.nrOfSlaves: # we can still create slaves. slave = \ self.slaveFactory.loadObject(self.slaveName) slave = self.prepareSlave(slave) self.activeCount += 1 thread = threading.Thread( target = self.slaveThread, \ args=(slave,) ) thread.start() self.lock.release()
def gen_hash(key): "Generate hash for given key" key = encodeUnicodeToBytes( key) # if key is not unicode, then it is not changed if not isinstance(key, bytes): raise NotImplementedError keyhash = hashlib.md5() keyhash.update(key) return keyhash.hexdigest()
def retrieveWork(self): """ _retrieveWork_ If activated this threadlsave retrieves work. It retrieves it from the persistent thread pool and changes the state from queued to process. """ myThread = threading.currentThread() # we only want to intitiate thread related issues once per thread. # this checks if our thread has the dbi attributes. if not hasattr(myThread, "dbi"): self.initInThread() else: # init creates a transaction that will call begin. myThread.transaction.begin() args = {'thread_pool_id' : self.args['thread_pool_id'], \ 'component' : self.args['componentName']} result = self.query.selectWork(args, \ self.args['thread_pool_table_buffer_out']) # we might need to look into multiple buffers and move work to find it. # from keeping track of the number of messages for us we know it # is there. if result[0] == None: self.query.moveWorkToBufferOut(args, \ self.args['thread_pool_table'], \ self.args['thread_pool_table_buffer_out'], \ self.args['thread_pool_buffer_size']) result = self.query.selectWork(args, \ self.args['thread_pool_table_buffer_out']) if result[0] == None: self.query.moveWorkToBufferOut(args, \ self.args['thread_pool_table_buffer_in'], \ self.args['thread_pool_table_buffer_out'], \ self.args['thread_pool_buffer_size']) result = self.query.selectWork(args, \ self.args['thread_pool_table_buffer_out']) if result[0] == None: # FIXME: make proper exception raise Exception("ERROR: How can that be!!") logging.debug("THREAD: Retrieved Work with id: " + str(result[0])) myThread.workId = str(result[0]) # get the actual work now: result = self.query.retrieveWork({'id':myThread.workId}, \ self.args['thread_pool_table_buffer_out']) self.query.tagWork({'id' : myThread.workId}, \ self.args['thread_pool_table_buffer_out']) # we commit here because if the component crashes this is where # if will look for lost threads (the ones that are in the process state myThread.transaction.commit() base64_decoder = base64.decodebytes if PY3 else base64.decodestring return (result[1], pickle.loads(base64_decoder(encodeUnicodeToBytes(result[2]))))
def genid(kwds): "Generate id for given field" if isinstance(kwds, dict): record = dict(kwds) data = json.JSONEncoder(sort_keys=True).encode(record) else: data = str(kwds) # it is fine both in py2 and in py3 keyhash = hashlib.md5() data = encodeUnicodeToBytes(data) # if data is not unicode, then it is not changed keyhash.update(data) return keyhash.hexdigest()
def _getCacheFilePath(self, stepHelper): fileName = "" for pileupType in stepHelper.data.pileup.listSections_(): datasets = getattr(getattr(stepHelper.data.pileup, pileupType), "dataset") fileName += "_".join(datasets) # TODO cache is not very effective if the dataset combination is different between workflow cacheHash = hashlib.sha1(encodeUnicodeToBytes(fileName)).hexdigest() cacheFile = "%s/pileupconf-%s.json" % (self.cacheDirectory(), cacheHash) return cacheFile
def persist(self, filename): """ _persist_ Pickle this object and save it to disk. """ if PY3: with open(filename, 'wb') as handle: pickle.dump(encodeUnicodeToBytes(self.data), handle) else: with open(filename, 'w') as handle: pickle.dump(self.data, handle) return
def encode(self, args, files): """ Encode form (name, value) and (name, filename, type) elements into multi-part/form-data. We don't actually need to know what we are uploading here, so just claim it's all text/plain. """ boundary = b'----------=_DQM_FILE_BOUNDARY_=-----------' (body, crlf) = (b'', b'\r\n') for (key, value) in viewitems(args): logging.debug("encode value - %s, %s", type(value), value) if PY2: payload = str(value) elif PY3: payload = value if not isinstance(payload, bytes): payload = str(payload) payload = encodeUnicodeToBytes(payload) key = encodeUnicodeToBytes(key) logging.debug("encode payload - %s, %s", type(payload), payload) body += b'--' + boundary + crlf body += (b'Content-Disposition: form-data; name="%s"' % key) + crlf body += crlf + payload + crlf for (key, filename) in viewitems(files): body += b'--' + boundary + crlf key = encodeUnicodeToBytesConditional(key, condition=PY3) filepath = encodeUnicodeToBytesConditional( os.path.basename(filename), condition=PY3) body += ( b'Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filepath)) + crlf body += (b'Content-Type: %s' % encodeUnicodeToBytes(self.filetype(filename))) + crlf body += (b'Content-Length: %d' % os.path.getsize(filename)) + crlf logging.debug("encode body (without binary file) -%s, %s", type(body), body) with open(filename, "rb") as fd: body += crlf + fd.read() + crlf body += b'--' + boundary + b'--' + crlf + crlf return (b'multipart/form-data; boundary=' + boundary, body)
def getGlobalTag(self): """ _getGlobalTag_ Retrieve the global tag. """ if hasattr(self.data.application.configuration, "arguments"): if hasattr(self.data.application.configuration.arguments, "globalTag"): return self.data.application.configuration.arguments.globalTag pickledArgs = encodeUnicodeToBytes( self.data.application.configuration.pickledarguments) return pickle.loads(pickledArgs)['globalTag']
def __hash__(self): """ Hash function for this dict. """ # Generate an immutable sorted string representing this object # NOTE: the run object needs to be hashed immutableSelf = [] for keyName in sorted(self): if keyName == "run_lumi": immutableSelf.append((keyName, hash(self[keyName]))) else: immutableSelf.append((keyName, self[keyName])) hashValue = hashlib.sha1(encodeUnicodeToBytes(str(immutableSelf))) return int(hashValue.hexdigest()[:15], 16)
def genkey(query): """ Generate a new key-hash for a given query. We use md5 hash for the query and key is just hex representation of this hash. """ if isinstance(query, dict): record = dict(query) query = json.JSONEncoder(sort_keys=True).encode(record) keyhash = hashlib.md5() query = encodeUnicodeToBytes(query) try: keyhash.update(query) except TypeError: # python3 # this may be avoided if we use encodeUnicodeToBytes(query) above keyhash.update(query.encode('ascii')) return keyhash.hexdigest()
def setDatasetName(self, datasetName): """ _setDatasetName_ Set the dataset name in the pickled arguments """ self.data.application.configuration.section_('arguments') self.data.application.configuration.arguments.datasetName = datasetName args = {} if hasattr(self.data.application.configuration, "pickledarguments"): args = pickle.loads( encodeUnicodeToBytes( self.data.application.configuration.pickledarguments)) args['datasetName'] = datasetName # FIXME: once both central services and WMAgent are in Py3, we can remove protocol=0 self.data.application.configuration.pickledarguments = pickle.dumps( args, protocol=0) return
def setGlobalTag(self, globalTag): """ _setGlobalTag_ Set the global tag. """ self.data.application.configuration.section_('arguments') self.data.application.configuration.arguments.globalTag = globalTag args = {} if hasattr(self.data.application.configuration, "pickledarguments"): args = pickle.loads( encodeUnicodeToBytes( self.data.application.configuration.pickledarguments)) args['globalTag'] = globalTag # FIXME: once both central services and WMAgent are in Py3, we can remove protocol=0 self.data.application.configuration.pickledarguments = pickle.dumps( args, protocol=0) return
def setHttpProxy(url): """ Use frontier to figure out the http_proxies. Pick one deterministically based on the url and loadbalance settings """ if 'http_proxy' in os.environ: return os.environ['http_proxy'] status, output = getstatusoutput('cmsGetFnConnect frontier://smallfiles') if status: return None proxyList = re.findall('\(proxyurl=([\w\d\.\-\:\/]+)\)', output) if 'loadbalance=proxies' in output: urlHash = int(hashlib.sha1(encodeUnicodeToBytes(url)).hexdigest()[:15], 16) proxy = proxyList[urlHash % len(proxyList)] else: proxy = proxyList[0] os.environ['http_proxy'] = proxy return proxy
def upload(self, url, args, filename): """ _upload_ Perform a file upload to the dqm server using HTTPS auth with the service proxy provided """ ident = "WMAgent python/%d.%d.%d" % sys.version_info[:3] uploadProxy = self.step.upload.proxy or os.environ.get( 'X509_USER_PROXY', None) logging.info("Using proxy file: %s", uploadProxy) logging.info("Using CA certificate path: %s", os.environ.get('X509_CERT_DIR')) msg = "HTTP POST upload arguments:\n" for arg in args: msg += " ==> %s: %s\n" % (arg, args[arg]) logging.info(msg) handler = HTTPSAuthHandler(key=uploadProxy, cert=uploadProxy) opener = OpenerDirector() opener.add_handler(handler) # setup the request object url = decodeBytesToUnicode(url) if PY3 else encodeUnicodeToBytes(url) datareq = Request(url + '/data/put') datareq.add_header('Accept-encoding', 'gzip') datareq.add_header('User-agent', ident) self.marshall(args, {'file': filename}, datareq) if 'https://' in url: result = opener.open(datareq) else: opener.add_handler(ProxyHandler({})) result = opener.open(datareq) data = result.read() if result.headers.get('Content-encoding', '') == 'gzip': data = GzipFile(fileobj=BytesIO(data)).read() return (result.headers, data)
def testAttachments(self): """ Test uploading attachments with and without checksumming """ doc = self.db.commitOne({'foo':'bar'}, timestamp=True)[0] attachment1 = "Hello" attachment2 = "How are you today?" attachment3 = "I'm very well, thanks for asking" attachment4 = "Lovely weather we're having" attachment5 = "Goodbye" keyhash = hashlib.md5() keyhash.update(encodeUnicodeToBytes(attachment5)) attachment5_md5 = keyhash.digest() attachment5_md5 = base64.b64encode(attachment5_md5) attachment6 = "Good day to you, sir!" #TODO: add a binary attachment - e.g. tar.gz doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment2, contentType="foo/bar") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment3, name="my_greeting") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment4, add_checksum=True) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment5, checksum=attachment5_md5) self.assertRaises(CouchInternalServerError, self.db.addAttachment, doc['id'], doc['rev'], attachment6, checksum='123')
def xml_parser(data, prim_key): """ Generic XML parser :param data: can be of type "file object", unicode string or bytes string """ if isinstance(data, (str, bytes)): stream = BytesIO() data = encodeUnicodeToBytes(data, "ignore") stream.write(data) stream.seek(0) else: stream = data context = ET.iterparse(stream) for event, elem in context: row = {} key = elem.tag if key != prim_key: continue row[key] = elem.attrib get_children(elem, event, row, key) elem.clear() yield row
def __hash__(self): """ Calculate the value of the hash NOTE: Python2 maxint is: > python -c 'import sys; print(sys.maxint)' 9223372036854775807 so we cannot use the full range of the hexadecimal hash code because it could cause an integer overflow. This is the maximum slice/value we can safely use: > int(15 * "f", base=16) 1152921504606846975 """ if isinstance(self.run, (newstr, newbytes)): value = encodeUnicodeToBytesConditional(self.run, condition=PY3) else: value = encodeUnicodeToBytesConditional(str(self.run), condition=PY3) hashValue = hashlib.sha1(value) # Generate immutable sorted list of lumis frozenEvents = str( sorted(listitems(self.eventsPerLumi), key=lambda x: x[0])) hashValue.update(encodeUnicodeToBytes(frozenEvents)) return int(hashValue.hexdigest()[:15], 16)
def update(self, val): """Process response data `val`.""" if self.digest: self.digest.update(encodeUnicodeToBytes(val))
def testExitCode(self): """ _testExitCode_ Test and see if we can get an exit code out of a report Note: Errors without a return code return 99999 getStepExitCode: returns the first valid and non-zero exit code getExitCode: uses the method above to get an exit code getStepExitCodes: returns a set of all exit codes within the step """ report = Report("cmsRun1") self.assertEqual(report.getExitCode(), 0) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 0) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {}) self.assertItemsEqual(report.getStepErrors(stepName="cmsRun1"), {}) report.addError(stepName="cmsRun1", exitCode=None, errorType="test", errorDetails="test") # None is not a valid exitCode, but it will get mapped to 99999 self.assertEqual(report.getExitCode(), 99999) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 99999) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 1) report.addError(stepName="cmsRun1", exitCode=102, errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 2) report.addError(stepName="cmsRun1", exitCode=103, errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 3) # now try to record the same exit code once again report.addError(stepName="cmsRun1", exitCode=104, errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 4) # and once again, but different type and details (which does not matter) report.addError(stepName="cmsRun1", exitCode=105, errorType="testEE", errorDetails="testAA") self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 5) # and once again, but different type and details - testing unicode handling report.addError(stepName="cmsRun1", exitCode=106, errorType="test", errorDetails="1 тℯṧт") self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 6) # and once again, but different type and details - testing unicode handling report.addError(stepName="cmsRun1", exitCode=107, errorType="test", errorDetails="2 тℯṧт \x95") self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 7) # and once again, but different type and details - testing unicode handling report.addError(stepName="cmsRun1", exitCode=108, errorType="test", errorDetails=encodeUnicodeToBytes("3 тℯṧт")) self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 8) # and once again, but different type and details - testing unicode handling report.addError(stepName="cmsRun1", exitCode=109, errorType="test", errorDetails=decodeBytesToUnicode("4 тℯṧт")) self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108, 109}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 9) # and once again, but different type and details - testing unicode handling report.addError(stepName="cmsRun1", exitCode=110, errorType="test", errorDetails={"нεʟʟ◎": 3.14159}) self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108, 109, 110}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 10) # and once again, but different type and details - testing unicode handling report.addError(stepName="cmsRun1", exitCode=111, errorType="test", errorDetails={"нεʟʟ◎ \x95": "ẘøґℓ∂ \x95"}) self.assertEqual(report.getExitCode(), 102) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111}) self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 11)
def makeSandbox(self, buildItHere, workload): """ __makeSandbox__ MakeSandbox creates and archives a sandbox in buildItHere, returning the path to the archive and putting it in the task """ workloadName = workload.name() # Create path to sandbox pileupCachePath = "%s/pileupCache" % buildItHere path = "%s/%s/WMSandbox" % (buildItHere, workloadName) workloadFile = os.path.join(path, "WMWorkload.pkl") archivePath = os.path.join( buildItHere, "%s/%s-Sandbox.tar.bz2" % (workloadName, workloadName)) # check if already built if os.path.exists(archivePath) and os.path.exists(workloadFile): workload.setSpecUrl(workloadFile) # point to sandbox spec return archivePath if os.path.exists(path): shutil.rmtree(path) # // # // Set up Fetcher plugins, use default list for maintaining # // compatibility commonFetchers = ["CMSSWFetcher", "URLFetcher", "PileupFetcher"] # generate the real path and make it self._makePathonPackage(path) # Add sandbox path to workload workload.setSandbox(archivePath) userSandboxes = [] for topLevelTask in workload.taskIterator(): for taskNode in topLevelTask.nodeIterator(): task = WMTask.WMTaskHelper(taskNode) fetcherNames = commonFetchers[:] taskFetchers = getattr(task.data, "fetchers", []) fetcherNames.extend(taskFetchers) fetcherInstances = list(map(getFetcher, fetcherNames)) taskPath = "%s/%s" % (path, task.name()) self._makePathonPackage(taskPath) # TODO sandbox is property of workload now instead of task # but someother places uses as task propery (i.e. TaskQueue) # so backward compatability save as task attribute as well. setattr(task.data.input, 'sandbox', archivePath) for s in task.steps().nodeIterator(): s = WMStep.WMStepHelper(s) stepPath = "%s/%s" % (taskPath, s.name()) self._makePathonPackage(stepPath) userSandboxes.extend(s.getUserSandboxes()) # // # // Execute the fetcher plugins # // for fetcher in fetcherInstances: # TODO: when cache directory is set as path, cache is maintained by workflow. # In that case, cache will be deleted when workflow is done, # but if different workflow can share the same cache. # You can set the cache direcoty somewhere else, but need to have cache refresh (delete) policy fetcher.setCacheDirectory(pileupCachePath) fetcher.setWorkingDirectory(taskPath) fetcher(task) # pickle up the workload for storage in the sandbox workload.setSpecUrl(workloadFile) workload.save(workloadFile) # now, tar everything up and put it somewhere special tarContent = [] deleteFiles = [] tarContent.append(("%s/%s/" % (buildItHere, workloadName), '/')) if self.packageWMCore: wmcorePath = os.path.realpath( os.path.join(os.path.dirname(__file__), '..')) (zipHandle, zipPath) = tempfile.mkstemp() os.close(zipHandle) zipFile = zipfile.ZipFile(zipPath, mode='w', compression=zipfile.ZIP_DEFLATED) for (root, dirnames, filenames) in os.walk(wmcorePath): for filename in filenames: if not filename.endswith(".svn") and not filename.endswith( ".git"): zipFile.write( filename=os.path.join(root, filename), # the name in the archive is the path relative to WMCore/ arcname=os.path.join( root, filename)[len(wmcorePath) - len('WMCore/') + 1:]) # Add a dummy module for zipimport testing (handle, dummyModulePath) = tempfile.mkstemp() # Python3 mkstemp file descriptors expects bytes-object os.write(handle, encodeUnicodeToBytes("#!/usr/bin/env python\n")) os.write(handle, encodeUnicodeToBytes("print('ZIPIMPORTTESTOK')\n")) os.close(handle) zipFile.write(filename=dummyModulePath, arcname='WMCore/ZipImportTestModule.py') # Add the wmcore zipball to the sandbox zipFile.close() tarContent.append((zipPath, '/WMCore.zip')) deleteFiles.append(dummyModulePath) deleteFiles.append(zipPath) psetTweaksPath = PSetTweaks.__path__[0] tarContent.append((psetTweaksPath, '/PSetTweaks')) utilsPath = Utils.__path__[0] tarContent.append((utilsPath, '/Utils')) for sb in userSandboxes: splitResult = urlsplit(sb) if not splitResult[0]: tarContent.append((sb, os.path.basename(sb))) with tarfile.open(archivePath, 'w:bz2') as tar: for (name, arcname) in tarContent: tar.add(name, arcname, filter=tarFilter) for deleteFile in deleteFiles: os.unlink(deleteFile) logging.info("Created sandbox %s with size %d", os.path.basename(archivePath), os.path.getsize(archivePath)) return archivePath
def set_opts(self, curl, url, params, headers, ckey=None, cert=None, capath=None, verbose=None, verb='GET', doseq=True, encode=False, cainfo=None, cookie=None): """Set options for given curl object, params should be a dictionary""" if not (isinstance(params, (dict, basestring)) or params is None): raise TypeError( "pycurl parameters should be passed as dictionary or an (encoded) string" ) curl.setopt(pycurl.NOSIGNAL, self.nosignal) curl.setopt(pycurl.TIMEOUT, self.timeout) curl.setopt(pycurl.CONNECTTIMEOUT, self.connecttimeout) curl.setopt(pycurl.FOLLOWLOCATION, self.followlocation) curl.setopt(pycurl.MAXREDIRS, self.maxredirs) # also accepts encoding/compression algorithms if headers and headers.get("Accept-Encoding"): if isinstance(headers["Accept-Encoding"], basestring): curl.setopt(pycurl.ENCODING, headers["Accept-Encoding"]) else: logging.warning( "Wrong data type for header 'Accept-Encoding': %s", type(headers["Accept-Encoding"])) if cookie and url in cookie: curl.setopt(pycurl.COOKIEFILE, cookie[url]) curl.setopt(pycurl.COOKIEJAR, cookie[url]) encoded_data = self.encode_params(params, verb, doseq, encode) if verb == 'GET': if encoded_data: url = url + '?' + encoded_data elif verb == 'HEAD': if encoded_data: url = url + '?' + encoded_data curl.setopt(pycurl.CUSTOMREQUEST, verb) curl.setopt(pycurl.HEADER, 1) curl.setopt(pycurl.NOBODY, True) elif verb == 'POST': curl.setopt(pycurl.POST, 1) if encoded_data: curl.setopt(pycurl.POSTFIELDS, encoded_data) elif verb == 'DELETE' or verb == 'PUT': curl.setopt(pycurl.CUSTOMREQUEST, verb) curl.setopt(pycurl.HTTPHEADER, ['Transfer-Encoding: chunked']) if encoded_data: curl.setopt(pycurl.POSTFIELDS, encoded_data) else: raise Exception('Unsupported HTTP method "%s"' % verb) if verb in ('POST', 'PUT'): # only these methods (and PATCH) require this header headers["Content-Length"] = str(len(encoded_data)) # we must pass url as a string data-type, otherwise pycurl will fail with error # TypeError: invalid arguments to setopt # see https://curl.haxx.se/mail/curlpython-2007-07/0001.html curl.setopt(pycurl.URL, encodeUnicodeToBytes(url)) if headers: curl.setopt(pycurl.HTTPHEADER, \ [encodeUnicodeToBytes("%s: %s" % (k, v)) for k, v in viewitems(headers)]) bbuf = BytesIO() hbuf = BytesIO() curl.setopt(pycurl.WRITEFUNCTION, bbuf.write) curl.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: curl.setopt(pycurl.CAPATH, capath) curl.setopt(pycurl.SSL_VERIFYPEER, True) if cainfo: curl.setopt(pycurl.CAINFO, cainfo) else: curl.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: curl.setopt(pycurl.SSLKEY, ckey) if cert: curl.setopt(pycurl.SSLCERT, cert) if verbose: curl.setopt(pycurl.VERBOSE, True) curl.setopt(pycurl.DEBUGFUNCTION, self.debug) return bbuf, hbuf