Esempio n. 1
0
def _stream_compress_deflate(reply, compress_level, max_chunk):
    """Streaming compressor for the 'deflate' method. Generates output that
    is guaranteed to expand at the exact same chunk boundaries as original
    reply stream."""

    # Create zlib compression object, with raw data stream (negative window size)
    z = zlib.compressobj(compress_level, zlib.DEFLATED, -zlib.MAX_WBITS,
                         zlib.DEF_MEM_LEVEL, 0)

    # Data pending compression. We only take entire chunks from original
    # reply. Then process reply one chunk at a time. Whenever we have enough
    # data to compress, spit it out flushing the zlib engine entirely, so we
    # respect original chunk boundaries.
    npending = 0
    pending = []
    for chunk in reply:
        pending.append(chunk)
        npending += len(chunk)
        if npending >= max_chunk:
            part = z.compress(encodeUnicodeToBytes(
                "".join(pending))) + z.flush(zlib.Z_FULL_FLUSH)
            pending = []
            npending = 0
            yield part

    # Crank the compressor one more time for remaining output.
    if npending:
        yield z.compress(encodeUnicodeToBytes("".join(pending))) + z.flush(
            zlib.Z_FINISH)
Esempio n. 2
0
def fake_authz_headers(hmac_key,
                       method='HNLogin',
                       login='******',
                       name='Test User',
                       dn="/test/dn",
                       roles={},
                       format="list"):
    """Create fake authentication and authorisation headers compatible
    with the CMSWEB front-ends. Assumes you have the HMAC signing key
    the back-end will use to validate the headers.

    :arg str hmac_key: binary key data for signing headers.
    :arg str method: authentication method, one of X509Cert, X509Proxy,
      HNLogin, HostIP, AUCookie or None.
    :arg str login: account login name.
    :arg str name: account user name.
    :arg str dn: account X509 subject.
    :arg dict roles: role dictionary, each role with 'site' and 'group' lists.
    :returns: list of header name, value tuples to add to a HTTP request."""
    headers = {'cms-auth-status': 'OK', 'cms-authn-method': method}

    if login:
        headers['cms-authn-login'] = login

    if name:
        headers['cms-authn-name'] = name

    if dn:
        headers['cms-authn-dn'] = dn

    for name, role in viewitems(roles):
        name = 'cms-authz-' + authz_canonical(name)
        headers[name] = []
        for r in 'site', 'group':
            if r in role:
                headers[name].extend(
                    ["%s:%s" % (r, authz_canonical(v)) for v in role[r]])
        headers[name] = " ".join(headers[name])

    prefix = suffix = ""
    hkeys = list(headers)
    for hk in sorted(hkeys):
        if hk != 'cms-auth-status':
            prefix += "h%xv%x" % (len(hk), len(headers[hk]))
            suffix += "%s%s" % (hk, headers[hk])

    msg = prefix + "#" + suffix
    if PY3:
        hmac_key = encodeUnicodeToBytes(hmac_key)
        msg = encodeUnicodeToBytes(msg)
    cksum = hmac.new(hmac_key, msg, hashlib.sha1).hexdigest()
    headers['cms-authn-hmac'] = cksum
    if format == "list":
        return listitems(headers)
    else:
        return headers
Esempio n. 3
0
    def _sanitise_input(self, input_args=[], input_kwargs={}, method=None):
        """
        Pull out the necessary input from kwargs (by name) and, failing that,
        pulls out the number required args from args, which assumes the
        arguments are positional.

        _sanitise_input is called automatically if you use the _addMethod/_addDAO
        convenience functions. If you add your method to the methods dictionary
        by hand you should call _sanitise_input explicitly.

        In all but the most basic cases you'll likely want to over-ride this, or
        at least treat its outcome with deep suspicion.

        TODO: Would be nice to loose the method argument and derive it in this method.

        Returns a dictionary of validated, sanitised input data.
        """
        verb = request.method.upper()

        if len(input_args):
            input_args = list(input_args)
        if (len(input_args) + len(input_kwargs)) > len(
                self.methods[verb][method]['args']):
            self.debug('%s to %s expects %s argument(s), got %s' %
                       (verb, method, len(self.methods[verb][method]['args']),
                        (len(input_args) + len(input_kwargs))))
            raise HTTPError(
                400, 'Invalid input: Input arguments failed sanitation.')
        input_data = {}

        # VK, we must read input kwargs/args as string types
        # rather then unicode one. This is important for cx_Oracle
        # driver which will place parameters into binded queries
        # due to mixmatch (string vs unicode) between python and Oracle
        # we must pass string parameters.
        for a in self.methods[verb][method]['args']:
            if a in input_kwargs:
                v = input_kwargs[a]
                input_data[a] = decodeBytesToUnicode(
                    v) if PY3 else encodeUnicodeToBytes(v)
                input_kwargs.pop(a)
            else:
                if len(input_args):
                    v = input_args.pop(0)
                    input_data[a] = decodeBytesToUnicode(
                        v) if PY3 else encodeUnicodeToBytes(v)
        if input_kwargs:
            raise HTTPError(
                400, 'Invalid input: Input arguments failed sanitation.')
        self.debug('%s raw data: %s' % (method, {
            'args': input_args,
            'kwargs': input_kwargs
        }))
        self.debug('%s sanitised input_data: %s' % (method, input_data))
        return self._validate_input(input_data, verb, method)
Esempio n. 4
0
 def addBasicAuth(self, username, password):
     """Add basic auth headers to request"""
     ## TODO: base64.encodestring is deprecated
     # https://docs.python.org/3.8/library/base64.html#base64.encodestring
     # change to base64.encodebytes after we drop python2
     username = encodeUnicodeToBytes(username)
     password = encodeUnicodeToBytes(password)
     encodedauth = base64.encodestring(b'%s:%s' %
                                       (username, password)).strip()
     if PY3:
         encodedauth = decodeBytesToUnicode(encodedauth)
     auth_string = "Basic %s" % encodedauth
     self.additionalHeaders["Authorization"] = auth_string
Esempio n. 5
0
def _etag_tail(head, tail, etag):
    """Generator which first returns anything in `head`, then `tail`.
    Sets ETag header at the end to value of `etag` if it's defined and
    yields a value."""
    for chunk in head:
        yield encodeUnicodeToBytes(chunk)

    for chunk in tail:
        yield encodeUnicodeToBytes(chunk)

    etagval = (etag and etag.value())
    if etagval:
        cherrypy.response.headers["ETag"] = etagval
Esempio n. 6
0
    def __setattr__(self, name, value):
        if name.startswith("_internal_"):
            # skip test for internal setting
            object.__setattr__(self, name, value)
            return

        if isinstance(value, ConfigSection):
            # child ConfigSection
            self._internal_children.add(name)
            self._internal_settings.add(name)
            value._internal_parent_ref = self
            object.__setattr__(self, name, value)
            return

        if isinstance(value, unicode):
            # We should not use "ignore" in this case
            # if this failed before, it is better to have it fail also now.
            value = encodeUnicodeToBytes(value)

        # for backward compatibility use getattr and sure to work if the
        # _internal_skipChecks flag is not set
        if not getattr(self, '_internal_skipChecks', False):
            self._complexTypeCheck(name, value)

        object.__setattr__(self, name, value)
        self._internal_settings.add(name)
        return
Esempio n. 7
0
    def testZipEncodeStr(self):
        """
        Test the zipEncodeStr function.
        """
        message = """
%MSG-s CMSException:  AfterFile 02-Jun-2010 14:31:43 CEST PostEndRun
cms::Exception caught in cmsRun
---- EventProcessorFailure BEGIN
EventProcessingStopped
---- ScheduleExecutionFailure BEGIN
ProcessingStopped
---- InvalidReference BEGIN
BadRefCore Attempt to dereference a RefCore containing an invalid
ProductID has been detected. Please modify the calling
code to test validity before dereferencing.
cms::Exception going through module PatMCMatching/analyzePatMCMatching run: 1 lumi: 666672 event: 305
---- InvalidReference END
Exception going through path p
---- ScheduleExecutionFailure END
an exception occurred during current event processing
cms::Exception caught in CMS.EventProcessor and rethrown
---- EventProcessorFailure END
"""
        encodedMessage = \
            'eNp1j8FqwzAMhu95Cl0G2yEhaXvyrU3dkkFHqfcCnq02hkQOtlz6+HM2MrbDdBLS9/1CxdNJHcsI7UnJh8GJnScBsL0yhoMbEOpV+ZqoXNVNDc1GrBuxWUMr1TucfWRJ9pKoMGMU4scHo9OtZ3C5G+O8L3OBvCPxOXiDMfpw0G5IAWEnj91b8Xvn6KbYTxPab0+ZHm0aUD7QpDn/r/qP1dFdD85e8IoBySz0Ts+j1md9y4zjxMAebGYWTsMCGE+sHeVk0JS/+Qqc79lkuNtDryN8IBLAc1VVL5+o0W8i'
        encodedMessage = encodeUnicodeToBytes(encodedMessage)
        self.assertEqual(zipEncodeStr(message, maxLen=300, compressLevel=9, steps=10, truncateIndicator=" (...)"),
                         encodedMessage)
        # Test different maximum lengths
        # Encoded message should always be less than the maximum limit.
        for maxLen in (800, 500, 20):
            self.assertLessEqual(
                    len(zipEncodeStr(message, maxLen=maxLen, compressLevel=9, steps=10, truncateIndicator=" (...)")),
                    maxLen)
Esempio n. 8
0
    def addAttachment(self, id, rev, value, name=None, contentType=None, checksum=None, add_checksum=False):
        """
        Add an attachment stored in value to a document identified by id at revision rev.
        If specified the attachement will be uploaded as name, other wise the attachment is
        named "attachment".

        If not set CouchDB will try to determine contentType and default to text/plain.

        If checksum is specified pass this to CouchDB, it will refuse if the MD5 checksum
        doesn't match the one provided. If add_checksum is True calculate the checksum of
        the attachment and pass that into CouchDB for validation. The checksum should be the
        base64 encoded binary md5 (as returned by hashlib.md5().digest())
        """
        if name is None:
            name = "attachment"
        req_headers = {}

        if add_checksum:
            # calculate base64 encoded MD5
            keyhash = hashlib.md5()
            value_str = str(value) if not isinstance(value, (newstr, newbytes)) else value
            keyhash.update(encodeUnicodeToBytes(value_str))
            content_md5 = base64.b64encode(keyhash.digest())
            req_headers['Content-MD5'] = decodeBytesToUnicode(content_md5) if PY3 else content_md5
        elif checksum:
            req_headers['Content-MD5'] = decodeBytesToUnicode(checksum) if PY3 else checksum
        return self.put('/%s/%s/%s?rev=%s' % (self.name, id, name, rev),
                        value, encode=False,
                        contentType=contentType,
                        incoming_headers=req_headers)
Esempio n. 9
0
def remapDBS3Keys(data, stringify=False, **others):
    """Fields have been renamed between DBS2 and 3, take fields from DBS3
    and map to DBS2 values
    """
    mapping = {
        'num_file': 'NumberOfFiles',
        'num_files': 'NumberOfFiles',
        'num_event': 'NumberOfEvents',
        'num_block': 'NumberOfBlocks',
        'num_lumi': 'NumberOfLumis',
        'event_count': 'NumberOfEvents',
        'run_num': 'RunNumber',
        'file_size': 'FileSize',
        'block_size': 'BlockSize',
        'file_count': 'NumberOfFiles',
        'open_for_writing': 'OpenForWriting',
        'logical_file_name': 'LogicalFileName',
        'adler32': 'Adler32',
        'check_sum': 'Checksum',
        'md5': 'Md5',
        'block_name': 'BlockName',
        'lumi_section_num': 'LumiSectionNumber'
    }

    mapping.update(others)
    formatFunc = lambda x: encodeUnicodeToBytes(x) if stringify else x
    for name, newname in viewitems(mapping):
        if name in data:
            data[newname] = formatFunc(data[name])
    return data
Esempio n. 10
0
    def enqueue( self, key, *parameters ):
        """
        _enqueue_

        Add a new work item to the queue.
        This may result in threads being spawned if there are threads
        available.

        """
        self.lock.acquire()
        base64_encoder = base64.encodebytes if PY3 else base64.encodestring
        args = {'event': str(key),
                'component' : self.component.config.Agent.componentName,
                'payload' : base64_encoder(encodeUnicodeToBytes(pickle.dumps(parameters))),
                'thread_pool_id' : self.threadPoolId}
        myThread = threading.currentThread()
        myThread.transaction.begin()
        self.query.insertWork(args, self.poolTableBufferIn)
        # we need to commit here otherwise the thread transaction might not
        # see it. check if this buffer needs to be flushed.
        myThread.transaction.commit()
        myThread.transaction.begin()
        bufferSize = self.query.getQueueLength(\
            {'componentName' : self.component.config.Agent.componentName, \
             'thread_pool_id' : self.threadPoolId}, self.poolTableBufferIn)
        if bufferSize > self.bufferSize:
            self.query.moveWorkFromBufferIn(self.poolTableBufferIn, \
                self.poolTable)
        #FIXME: we should call the msgService finsih method here before
        #this commit so we know the event/payload is transferred to a thread.
        myThread.transaction.commit()
        #logging.info("THREADPOOL: Enqueued item")

        # enqueue the work item
        self.callQueue += 1
        # check if there is a slave in the queue (then resue it)
        thread = None
        if len( self.slaveQueue ):
            # There is an available server: spawn a thread
            slave = self.slaveQueue[0]
            del self.slaveQueue[0]
            # Increment the count of active threads
            self.activeCount += 1
            thread = threading.Thread( target = self.slaveThread, \
                args=(slave,) )
            thread.start()
        # check if we can instantiate more slaves.
        else:
            if self.activeCount < self.nrOfSlaves:
                # we can still create slaves.
                slave  = \
                    self.slaveFactory.loadObject(self.slaveName)
                slave = self.prepareSlave(slave)
                self.activeCount += 1
                thread = threading.Thread( target = self.slaveThread, \
                    args=(slave,) )
                thread.start()

        self.lock.release()
Esempio n. 11
0
def gen_hash(key):
    "Generate hash for given key"
    key = encodeUnicodeToBytes(
        key)  # if key is not unicode, then it is not changed
    if not isinstance(key, bytes):
        raise NotImplementedError
    keyhash = hashlib.md5()
    keyhash.update(key)
    return keyhash.hexdigest()
Esempio n. 12
0
    def retrieveWork(self):
        """
        _retrieveWork_

        If activated this threadlsave retrieves work. It retrieves
        it from the persistent thread pool and changes the state
        from queued to process.
        """
        myThread = threading.currentThread()
        # we only want to intitiate thread related issues once per thread.
        # this checks if our thread has the dbi attributes.
        if not hasattr(myThread, "dbi"):
            self.initInThread()
        else:
            # init creates a transaction that will call begin.
            myThread.transaction.begin()
        args = {'thread_pool_id' : self.args['thread_pool_id'], \
            'component' : self.args['componentName']}
        result = self.query.selectWork(args, \
            self.args['thread_pool_table_buffer_out'])
        # we might need to look into multiple buffers and move work to find it.
        # from keeping track of the number of messages for us we know it
        # is there.
        if result[0] == None:
            self.query.moveWorkToBufferOut(args, \
                self.args['thread_pool_table'], \
                self.args['thread_pool_table_buffer_out'], \
                self.args['thread_pool_buffer_size'])
        result = self.query.selectWork(args, \
            self.args['thread_pool_table_buffer_out'])
        if result[0] == None:
            self.query.moveWorkToBufferOut(args, \
                self.args['thread_pool_table_buffer_in'], \
                self.args['thread_pool_table_buffer_out'], \
                self.args['thread_pool_buffer_size'])
        result = self.query.selectWork(args, \
                self.args['thread_pool_table_buffer_out'])

        if result[0] == None:
            # FIXME: make proper exception
            raise Exception("ERROR: How can that be!!")
        logging.debug("THREAD: Retrieved Work with id: " + str(result[0]))
        myThread.workId = str(result[0])
        # get the actual work now:
        result = self.query.retrieveWork({'id':myThread.workId}, \
            self.args['thread_pool_table_buffer_out'])
        self.query.tagWork({'id' : myThread.workId}, \
            self.args['thread_pool_table_buffer_out'])
        # we commit here because if the component crashes this is where
        # if will look for lost threads (the ones that are in the process state
        myThread.transaction.commit()
        base64_decoder = base64.decodebytes if PY3 else base64.decodestring
        return (result[1],
                pickle.loads(base64_decoder(encodeUnicodeToBytes(result[2]))))
Esempio n. 13
0
def genid(kwds):
    "Generate id for given field"
    if  isinstance(kwds, dict):
        record = dict(kwds)
        data = json.JSONEncoder(sort_keys=True).encode(record)
    else:
        data = str(kwds)  # it is fine both in py2 and in py3
    keyhash = hashlib.md5()
    data = encodeUnicodeToBytes(data)  # if data is not unicode, then it is not changed
    keyhash.update(data)
    return keyhash.hexdigest()
Esempio n. 14
0
    def _getCacheFilePath(self, stepHelper):

        fileName = ""
        for pileupType in stepHelper.data.pileup.listSections_():
            datasets = getattr(getattr(stepHelper.data.pileup, pileupType),
                               "dataset")
            fileName += "_".join(datasets)
        # TODO cache is not very effective if the dataset combination is different between workflow
        cacheHash = hashlib.sha1(encodeUnicodeToBytes(fileName)).hexdigest()
        cacheFile = "%s/pileupconf-%s.json" % (self.cacheDirectory(),
                                               cacheHash)
        return cacheFile
Esempio n. 15
0
    def persist(self, filename):
        """
        _persist_

        Pickle this object and save it to disk.
        """
        if PY3:
            with open(filename, 'wb') as handle:
                pickle.dump(encodeUnicodeToBytes(self.data), handle)
        else:
            with open(filename, 'w') as handle:
                pickle.dump(self.data, handle)
        return
Esempio n. 16
0
 def encode(self, args, files):
     """
     Encode form (name, value) and (name, filename, type) elements into
     multi-part/form-data. We don't actually need to know what we are
     uploading here, so just claim it's all text/plain.
     """
     boundary = b'----------=_DQM_FILE_BOUNDARY_=-----------'
     (body, crlf) = (b'', b'\r\n')
     for (key, value) in viewitems(args):
         logging.debug("encode value - %s, %s", type(value), value)
         if PY2:
             payload = str(value)
         elif PY3:
             payload = value
             if not isinstance(payload, bytes):
                 payload = str(payload)
             payload = encodeUnicodeToBytes(payload)
             key = encodeUnicodeToBytes(key)
         logging.debug("encode payload - %s, %s", type(payload), payload)
         body += b'--' + boundary + crlf
         body += (b'Content-Disposition: form-data; name="%s"' % key) + crlf
         body += crlf + payload + crlf
     for (key, filename) in viewitems(files):
         body += b'--' + boundary + crlf
         key = encodeUnicodeToBytesConditional(key, condition=PY3)
         filepath = encodeUnicodeToBytesConditional(
             os.path.basename(filename), condition=PY3)
         body += (
             b'Content-Disposition: form-data; name="%s"; filename="%s"' %
             (key, filepath)) + crlf
         body += (b'Content-Type: %s' %
                  encodeUnicodeToBytes(self.filetype(filename))) + crlf
         body += (b'Content-Length: %d' % os.path.getsize(filename)) + crlf
         logging.debug("encode body (without binary file) -%s, %s",
                       type(body), body)
         with open(filename, "rb") as fd:
             body += crlf + fd.read() + crlf
         body += b'--' + boundary + b'--' + crlf + crlf
     return (b'multipart/form-data; boundary=' + boundary, body)
Esempio n. 17
0
    def getGlobalTag(self):
        """
        _getGlobalTag_

        Retrieve the global tag.
        """
        if hasattr(self.data.application.configuration, "arguments"):
            if hasattr(self.data.application.configuration.arguments,
                       "globalTag"):
                return self.data.application.configuration.arguments.globalTag

        pickledArgs = encodeUnicodeToBytes(
            self.data.application.configuration.pickledarguments)
        return pickle.loads(pickledArgs)['globalTag']
Esempio n. 18
0
 def __hash__(self):
     """
     Hash function for this dict.
     """
     # Generate an immutable sorted string representing this object
     # NOTE: the run object needs to be hashed
     immutableSelf = []
     for keyName in sorted(self):
         if keyName == "run_lumi":
             immutableSelf.append((keyName, hash(self[keyName])))
         else:
             immutableSelf.append((keyName, self[keyName]))
     hashValue = hashlib.sha1(encodeUnicodeToBytes(str(immutableSelf)))
     return int(hashValue.hexdigest()[:15], 16)
Esempio n. 19
0
def genkey(query):
    """
    Generate a new key-hash for a given query. We use md5 hash for the
    query and key is just hex representation of this hash.
    """
    if isinstance(query, dict):
        record = dict(query)
        query = json.JSONEncoder(sort_keys=True).encode(record)
    keyhash = hashlib.md5()
    query = encodeUnicodeToBytes(query)
    try:
        keyhash.update(query)
    except TypeError:  # python3
        # this may be avoided if we use encodeUnicodeToBytes(query) above
        keyhash.update(query.encode('ascii'))
    return keyhash.hexdigest()
Esempio n. 20
0
    def setDatasetName(self, datasetName):
        """
        _setDatasetName_

        Set the dataset name in the pickled arguments
        """
        self.data.application.configuration.section_('arguments')
        self.data.application.configuration.arguments.datasetName = datasetName

        args = {}
        if hasattr(self.data.application.configuration, "pickledarguments"):
            args = pickle.loads(
                encodeUnicodeToBytes(
                    self.data.application.configuration.pickledarguments))
        args['datasetName'] = datasetName
        # FIXME: once both central services and WMAgent are in Py3, we can remove protocol=0
        self.data.application.configuration.pickledarguments = pickle.dumps(
            args, protocol=0)

        return
Esempio n. 21
0
    def setGlobalTag(self, globalTag):
        """
        _setGlobalTag_

        Set the global tag.
        """
        self.data.application.configuration.section_('arguments')
        self.data.application.configuration.arguments.globalTag = globalTag

        args = {}
        if hasattr(self.data.application.configuration, "pickledarguments"):
            args = pickle.loads(
                encodeUnicodeToBytes(
                    self.data.application.configuration.pickledarguments))
        args['globalTag'] = globalTag
        # FIXME: once both central services and WMAgent are in Py3, we can remove protocol=0
        self.data.application.configuration.pickledarguments = pickle.dumps(
            args, protocol=0)

        return
Esempio n. 22
0
def setHttpProxy(url):
    """
    Use frontier to figure out the http_proxies.
    Pick one deterministically based on the url and loadbalance settings
    """
    if 'http_proxy' in os.environ:
        return os.environ['http_proxy']

    status, output = getstatusoutput('cmsGetFnConnect frontier://smallfiles')
    if status:
        return None

    proxyList = re.findall('\(proxyurl=([\w\d\.\-\:\/]+)\)', output)
    if 'loadbalance=proxies' in output:
        urlHash = int(hashlib.sha1(encodeUnicodeToBytes(url)).hexdigest()[:15], 16)
        proxy = proxyList[urlHash % len(proxyList)]
    else:
        proxy = proxyList[0]
    os.environ['http_proxy'] = proxy
    return proxy
Esempio n. 23
0
    def upload(self, url, args, filename):
        """
        _upload_

        Perform a file upload to the dqm server using HTTPS auth with the
        service proxy provided
        """
        ident = "WMAgent python/%d.%d.%d" % sys.version_info[:3]
        uploadProxy = self.step.upload.proxy or os.environ.get(
            'X509_USER_PROXY', None)
        logging.info("Using proxy file: %s", uploadProxy)
        logging.info("Using CA certificate path: %s",
                     os.environ.get('X509_CERT_DIR'))

        msg = "HTTP POST upload arguments:\n"
        for arg in args:
            msg += "  ==> %s: %s\n" % (arg, args[arg])
        logging.info(msg)

        handler = HTTPSAuthHandler(key=uploadProxy, cert=uploadProxy)
        opener = OpenerDirector()
        opener.add_handler(handler)

        # setup the request object
        url = decodeBytesToUnicode(url) if PY3 else encodeUnicodeToBytes(url)
        datareq = Request(url + '/data/put')
        datareq.add_header('Accept-encoding', 'gzip')
        datareq.add_header('User-agent', ident)
        self.marshall(args, {'file': filename}, datareq)

        if 'https://' in url:
            result = opener.open(datareq)
        else:
            opener.add_handler(ProxyHandler({}))
            result = opener.open(datareq)

        data = result.read()
        if result.headers.get('Content-encoding', '') == 'gzip':
            data = GzipFile(fileobj=BytesIO(data)).read()

        return (result.headers, data)
Esempio n. 24
0
    def testAttachments(self):
        """
        Test uploading attachments with and without checksumming
        """
        doc = self.db.commitOne({'foo':'bar'}, timestamp=True)[0]
        attachment1 = "Hello"
        attachment2 = "How are you today?"
        attachment3 = "I'm very well, thanks for asking"
        attachment4 = "Lovely weather we're having"
        attachment5 = "Goodbye"
        keyhash = hashlib.md5()
        keyhash.update(encodeUnicodeToBytes(attachment5))
        attachment5_md5 = keyhash.digest()
        attachment5_md5 = base64.b64encode(attachment5_md5)
        attachment6 = "Good day to you, sir!"
        #TODO: add a binary attachment - e.g. tar.gz
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1)
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment2, contentType="foo/bar")
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment3, name="my_greeting")
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment4, add_checksum=True)
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment5, checksum=attachment5_md5)

        self.assertRaises(CouchInternalServerError, self.db.addAttachment, doc['id'], doc['rev'], attachment6, checksum='123')
Esempio n. 25
0
def xml_parser(data, prim_key):
    """
    Generic XML parser

    :param data: can be of type "file object", unicode string or bytes string
    """
    if isinstance(data, (str, bytes)):
        stream = BytesIO()
        data = encodeUnicodeToBytes(data, "ignore")
        stream.write(data)
        stream.seek(0)
    else:
        stream = data

    context = ET.iterparse(stream)
    for event, elem in context:
        row = {}
        key = elem.tag
        if key != prim_key:
            continue
        row[key] = elem.attrib
        get_children(elem, event, row, key)
        elem.clear()
        yield row
Esempio n. 26
0
    def __hash__(self):
        """
        Calculate the value of the hash

        NOTE: Python2 maxint is:
        > python -c 'import sys; print(sys.maxint)'
        9223372036854775807
        so we cannot use the full range of the hexadecimal hash code because
        it could cause an integer overflow. This is the maximum slice/value we
        can safely use:
        > int(15 * "f", base=16)
        1152921504606846975
        """
        if isinstance(self.run, (newstr, newbytes)):
            value = encodeUnicodeToBytesConditional(self.run, condition=PY3)
        else:
            value = encodeUnicodeToBytesConditional(str(self.run),
                                                    condition=PY3)
        hashValue = hashlib.sha1(value)
        # Generate immutable sorted list of lumis
        frozenEvents = str(
            sorted(listitems(self.eventsPerLumi), key=lambda x: x[0]))
        hashValue.update(encodeUnicodeToBytes(frozenEvents))
        return int(hashValue.hexdigest()[:15], 16)
Esempio n. 27
0
 def update(self, val):
     """Process response data `val`."""
     if self.digest:
         self.digest.update(encodeUnicodeToBytes(val))
Esempio n. 28
0
    def testExitCode(self):
        """
        _testExitCode_

        Test and see if we can get an exit code out of a report

        Note: Errors without a return code return 99999
        getStepExitCode: returns the first valid and non-zero exit code
        getExitCode: uses the method above to get an exit code
        getStepExitCodes: returns a set of all exit codes within the step
        """

        report = Report("cmsRun1")
        self.assertEqual(report.getExitCode(), 0)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 0)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {})
        self.assertItemsEqual(report.getStepErrors(stepName="cmsRun1"), {})

        report.addError(stepName="cmsRun1", exitCode=None, errorType="test", errorDetails="test")
        # None is not a valid exitCode, but it will get mapped to 99999
        self.assertEqual(report.getExitCode(), 99999)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 99999)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 1)

        report.addError(stepName="cmsRun1", exitCode=102, errorType="test", errorDetails="test")
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 2)

        report.addError(stepName="cmsRun1", exitCode=103, errorType="test", errorDetails="test")
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 3)

        # now try to record the same exit code once again
        report.addError(stepName="cmsRun1", exitCode=104, errorType="test", errorDetails="test")
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 4)

        # and once again, but different type and details (which does not matter)
        report.addError(stepName="cmsRun1", exitCode=105, errorType="testEE", errorDetails="testAA")
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 5)

        # and once again, but different type and details - testing unicode handling
        report.addError(stepName="cmsRun1", exitCode=106, errorType="test", errorDetails="1 тℯṧт")
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 6)

        # and once again, but different type and details - testing unicode handling
        report.addError(stepName="cmsRun1", exitCode=107, errorType="test", errorDetails="2 тℯṧт \x95")
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 7)

        # and once again, but different type and details - testing unicode handling
        report.addError(stepName="cmsRun1", exitCode=108, errorType="test", errorDetails=encodeUnicodeToBytes("3 тℯṧт"))
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 8)

        # and once again, but different type and details - testing unicode handling
        report.addError(stepName="cmsRun1", exitCode=109, errorType="test", errorDetails=decodeBytesToUnicode("4 тℯṧт"))
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108, 109})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 9)

        # and once again, but different type and details - testing unicode handling
        report.addError(stepName="cmsRun1", exitCode=110, errorType="test", errorDetails={"нεʟʟ◎": 3.14159})
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108, 109, 110})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 10)

        # and once again, but different type and details - testing unicode handling
        report.addError(stepName="cmsRun1", exitCode=111, errorType="test", errorDetails={"нεʟʟ◎ \x95": "ẘøґℓ∂ \x95"})
        self.assertEqual(report.getExitCode(), 102)
        self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 102)
        self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111})
        self.assertEqual(report.getStepErrors(stepName="cmsRun1")['errorCount'], 11)
Esempio n. 29
0
    def makeSandbox(self, buildItHere, workload):
        """
            __makeSandbox__

            MakeSandbox creates and archives a sandbox in buildItHere,
            returning the path to the archive and putting it in the
            task
        """
        workloadName = workload.name()
        # Create path to sandbox
        pileupCachePath = "%s/pileupCache" % buildItHere
        path = "%s/%s/WMSandbox" % (buildItHere, workloadName)
        workloadFile = os.path.join(path, "WMWorkload.pkl")
        archivePath = os.path.join(
            buildItHere,
            "%s/%s-Sandbox.tar.bz2" % (workloadName, workloadName))
        # check if already built
        if os.path.exists(archivePath) and os.path.exists(workloadFile):
            workload.setSpecUrl(workloadFile)  # point to sandbox spec
            return archivePath
        if os.path.exists(path):
            shutil.rmtree(path)
        # //
        # // Set up Fetcher plugins, use default list for maintaining
        # //  compatibility
        commonFetchers = ["CMSSWFetcher", "URLFetcher", "PileupFetcher"]

        # generate the real path and make it
        self._makePathonPackage(path)

        # Add sandbox path to workload
        workload.setSandbox(archivePath)
        userSandboxes = []
        for topLevelTask in workload.taskIterator():
            for taskNode in topLevelTask.nodeIterator():
                task = WMTask.WMTaskHelper(taskNode)

                fetcherNames = commonFetchers[:]
                taskFetchers = getattr(task.data, "fetchers", [])
                fetcherNames.extend(taskFetchers)
                fetcherInstances = list(map(getFetcher, fetcherNames))

                taskPath = "%s/%s" % (path, task.name())
                self._makePathonPackage(taskPath)

                # TODO sandbox is property of workload now instead of task
                # but someother places uses as task propery (i.e. TaskQueue)
                # so backward compatability save as task attribute as well.
                setattr(task.data.input, 'sandbox', archivePath)

                for s in task.steps().nodeIterator():
                    s = WMStep.WMStepHelper(s)
                    stepPath = "%s/%s" % (taskPath, s.name())
                    self._makePathonPackage(stepPath)
                    userSandboxes.extend(s.getUserSandboxes())

                # //
                # // Execute the fetcher plugins
                # //
                for fetcher in fetcherInstances:
                    # TODO: when cache directory is set as path, cache is maintained by workflow.
                    # In that case, cache will be deleted when workflow is done,
                    # but if different workflow can share the same cache.
                    # You can set the cache direcoty somewhere else, but need to have cache refresh (delete) policy
                    fetcher.setCacheDirectory(pileupCachePath)
                    fetcher.setWorkingDirectory(taskPath)
                    fetcher(task)

        # pickle up the workload for storage in the sandbox
        workload.setSpecUrl(workloadFile)
        workload.save(workloadFile)

        # now, tar everything up and put it somewhere special

        tarContent = []
        deleteFiles = []
        tarContent.append(("%s/%s/" % (buildItHere, workloadName), '/'))

        if self.packageWMCore:

            wmcorePath = os.path.realpath(
                os.path.join(os.path.dirname(__file__), '..'))

            (zipHandle, zipPath) = tempfile.mkstemp()
            os.close(zipHandle)
            zipFile = zipfile.ZipFile(zipPath,
                                      mode='w',
                                      compression=zipfile.ZIP_DEFLATED)

            for (root, dirnames, filenames) in os.walk(wmcorePath):
                for filename in filenames:
                    if not filename.endswith(".svn") and not filename.endswith(
                            ".git"):
                        zipFile.write(
                            filename=os.path.join(root, filename),
                            # the name in the archive is the path relative to WMCore/
                            arcname=os.path.join(
                                root, filename)[len(wmcorePath) -
                                                len('WMCore/') + 1:])

            # Add a dummy module for zipimport testing
            (handle, dummyModulePath) = tempfile.mkstemp()
            # Python3 mkstemp file descriptors expects bytes-object
            os.write(handle, encodeUnicodeToBytes("#!/usr/bin/env python\n"))
            os.write(handle,
                     encodeUnicodeToBytes("print('ZIPIMPORTTESTOK')\n"))
            os.close(handle)
            zipFile.write(filename=dummyModulePath,
                          arcname='WMCore/ZipImportTestModule.py')

            # Add the wmcore zipball to the sandbox
            zipFile.close()
            tarContent.append((zipPath, '/WMCore.zip'))
            deleteFiles.append(dummyModulePath)
            deleteFiles.append(zipPath)

            psetTweaksPath = PSetTweaks.__path__[0]
            tarContent.append((psetTweaksPath, '/PSetTweaks'))

            utilsPath = Utils.__path__[0]
            tarContent.append((utilsPath, '/Utils'))

        for sb in userSandboxes:
            splitResult = urlsplit(sb)
            if not splitResult[0]:
                tarContent.append((sb, os.path.basename(sb)))

        with tarfile.open(archivePath, 'w:bz2') as tar:
            for (name, arcname) in tarContent:
                tar.add(name, arcname, filter=tarFilter)

        for deleteFile in deleteFiles:
            os.unlink(deleteFile)

        logging.info("Created sandbox %s with size %d",
                     os.path.basename(archivePath),
                     os.path.getsize(archivePath))

        return archivePath
Esempio n. 30
0
    def set_opts(self,
                 curl,
                 url,
                 params,
                 headers,
                 ckey=None,
                 cert=None,
                 capath=None,
                 verbose=None,
                 verb='GET',
                 doseq=True,
                 encode=False,
                 cainfo=None,
                 cookie=None):
        """Set options for given curl object, params should be a dictionary"""
        if not (isinstance(params, (dict, basestring)) or params is None):
            raise TypeError(
                "pycurl parameters should be passed as dictionary or an (encoded) string"
            )
        curl.setopt(pycurl.NOSIGNAL, self.nosignal)
        curl.setopt(pycurl.TIMEOUT, self.timeout)
        curl.setopt(pycurl.CONNECTTIMEOUT, self.connecttimeout)
        curl.setopt(pycurl.FOLLOWLOCATION, self.followlocation)
        curl.setopt(pycurl.MAXREDIRS, self.maxredirs)

        # also accepts encoding/compression algorithms
        if headers and headers.get("Accept-Encoding"):
            if isinstance(headers["Accept-Encoding"], basestring):
                curl.setopt(pycurl.ENCODING, headers["Accept-Encoding"])
            else:
                logging.warning(
                    "Wrong data type for header 'Accept-Encoding': %s",
                    type(headers["Accept-Encoding"]))

        if cookie and url in cookie:
            curl.setopt(pycurl.COOKIEFILE, cookie[url])
            curl.setopt(pycurl.COOKIEJAR, cookie[url])

        encoded_data = self.encode_params(params, verb, doseq, encode)

        if verb == 'GET':
            if encoded_data:
                url = url + '?' + encoded_data
        elif verb == 'HEAD':
            if encoded_data:
                url = url + '?' + encoded_data
            curl.setopt(pycurl.CUSTOMREQUEST, verb)
            curl.setopt(pycurl.HEADER, 1)
            curl.setopt(pycurl.NOBODY, True)
        elif verb == 'POST':
            curl.setopt(pycurl.POST, 1)
            if encoded_data:
                curl.setopt(pycurl.POSTFIELDS, encoded_data)
        elif verb == 'DELETE' or verb == 'PUT':
            curl.setopt(pycurl.CUSTOMREQUEST, verb)
            curl.setopt(pycurl.HTTPHEADER, ['Transfer-Encoding: chunked'])
            if encoded_data:
                curl.setopt(pycurl.POSTFIELDS, encoded_data)
        else:
            raise Exception('Unsupported HTTP method "%s"' % verb)

        if verb in ('POST', 'PUT'):
            # only these methods (and PATCH) require this header
            headers["Content-Length"] = str(len(encoded_data))

        # we must pass url as a string data-type, otherwise pycurl will fail with error
        # TypeError: invalid arguments to setopt
        # see https://curl.haxx.se/mail/curlpython-2007-07/0001.html
        curl.setopt(pycurl.URL, encodeUnicodeToBytes(url))
        if headers:
            curl.setopt(pycurl.HTTPHEADER, \
                [encodeUnicodeToBytes("%s: %s" % (k, v)) for k, v in viewitems(headers)])
        bbuf = BytesIO()
        hbuf = BytesIO()
        curl.setopt(pycurl.WRITEFUNCTION, bbuf.write)
        curl.setopt(pycurl.HEADERFUNCTION, hbuf.write)
        if capath:
            curl.setopt(pycurl.CAPATH, capath)
            curl.setopt(pycurl.SSL_VERIFYPEER, True)
            if cainfo:
                curl.setopt(pycurl.CAINFO, cainfo)
        else:
            curl.setopt(pycurl.SSL_VERIFYPEER, False)
        if ckey:
            curl.setopt(pycurl.SSLKEY, ckey)
        if cert:
            curl.setopt(pycurl.SSLCERT, cert)
        if verbose:
            curl.setopt(pycurl.VERBOSE, True)
            curl.setopt(pycurl.DEBUGFUNCTION, self.debug)
        return bbuf, hbuf