Esempio n. 1
0
def _getMetaData(datafile, dataEncFile, pubkey, encMethod):
    """Return info about an encryption context, as a multiline string"""
    try:
        now = codecs.utf_8_decode(time.strftime("%Y_%b_%d_%H%M", time.localtime()))[0]
    except UnicodeDecodeError:
        # use an all-numeric date (to sidestep the unicode error)
        now = codecs.utf_8_decode(time.strftime("%Y_%m_%d_%H%M", time.localtime()))[0]
    md = []
    md.append("orig file path: " + os.path.abspath(datafile))
    md.append("HMAC-sha256 of encrypted file: %s" % _sha256b64(dataEncFile))
    pubkey = open(pubkey).readlines()
    md.append("public key (first 20): " + pubkey[1][0:20])
    md.append("encryption: " + loggingID + "." + encMethod)
    md.append("RSA padding: " + RSA_PADDING)
    md.append("encrypted on date: " + now)
    md.append("openssl version: " + opensslVersion)
    if sys.platform in ["darwin"]:
        OSXver, _, architecture = platform.mac_ver()
        platInfo = "darwin " + OSXver + " " + architecture
    elif sys.platform.startswith("linux"):
        platInfo = "linux " + platform.release()
    elif sys.platform in ["win32"]:
        platInfo = "windowsversion=" + repr(sys.getwindowsversion())
    else:
        platInfo = "[unknown]"
    md.append("platform: " + platInfo)
    md.append("--- end of meta-data %s ---" % now)
    return "\n".join(md)
Esempio n. 2
0
    def decode(str, errors='strict'):
        """
        Decode strings

        :param str str: input string
        :param str errors:error level
        :return: str
        """

        output = ''
        try:
            if len(str) < 3:
                if codecs.BOM_UTF8.startswith(str):
                    # not enough data to decide if this is a BOM
                    # => try again on the next call
                    output = ""

            elif str[:3] == codecs.BOM_UTF8:
                (output, sizes) = codecs.utf_8_decode(str[3:], errors)
            elif str[:3] == codecs.BOM_UTF16:
                output = str[3:].decode('utf16')
            else:
                # (else) no BOM present
                (output, sizes) = codecs.utf_8_decode(str, errors)
            return str(output)
        except (UnicodeDecodeError, Exception):
            # seems, its getting not a content (images, file, etc)
            try:
                return str.decode('cp1251')
            except (UnicodeDecodeError, Exception):
                return ""
Esempio n. 3
0
def _getMetaData(datafile, dataEncFile, pubkey, encMethod):
    """Return info about an encryption context, as a multiline string"""
    try:
        now = codecs.utf_8_decode(time.strftime("%Y_%b_%d_%H%M", time.localtime()))[0]
    except UnicodeDecodeError:
        # use an all-numeric date (to sidestep the unicode error)
        now = codecs.utf_8_decode(time.strftime("%Y_%m_%d_%H%M", time.localtime()))[0]
    md = []
    md.append('orig file path: ' + os.path.abspath(datafile) )
    md.append('HMAC-sha256 of encrypted file: %s' % _sha256b64(dataEncFile) )
    pubkey = open(pubkey).readlines()
    md.append('public key (first 20): ' + pubkey[1][0:20] )
    md.append('encryption: ' + loggingID + '.' + encMethod)
    md.append('RSA padding: ' + RSA_PADDING )
    md.append('encrypted on date: ' + now )
    md.append('openssl version: ' + opensslVersion )
    if sys.platform in ['darwin']:
        OSXver, _, architecture = platform.mac_ver()
        platInfo = 'darwin ' + OSXver + ' ' + architecture
    elif sys.platform.startswith('linux'):
        platInfo = 'linux '+platform.release()
    elif sys.platform in ['win32']:
        platInfo = 'windowsversion=' + repr(sys.getwindowsversion())
    else:
        platInfo = '[unknown]'
    md.append('platform: ' + platInfo)
    md.append('--- end of meta-data %s ---' % now)
    return '\n'.join(md)
Esempio n. 4
0
    def _buffer_decode_step(self, input, errors, final):
        # If begin of CESU-8 sequence
        if input.startswith(b'\xed'):
            if len(input) < 6:
                if final:
                    # There aren't six bytes to decode.
                    return codecs.utf_8_decode(input, errors, final)
                else:
                    # Stream is not done yet
                    return u'', 0
            elif input[3] == 237 or (PY2 and input[3] == b"\xed"):
                if PY2:
                    bytenums = [ord(b) for b in input[:6]]
                else:
                    bytenums = input

                codepoint = (
                    ((bytenums[1] & 0x0f) << 16) +
                    ((bytenums[2] & 0x3f) << 10) +
                    ((bytenums[4] & 0x0f) << 6) +
                    (bytenums[5] & 0x3f) +
                    0x10000
                )
                return unichr(codepoint), 6

        # Fallback to UTF-8
        return codecs.utf_8_decode(input, errors, final)
Esempio n. 5
0
 def _buffer_decode(self, input, errors, final):
     if self.first and codecs.BOM_UTF8.startswith(input): # might be a BOM
         if len(input) < 3:
             # not enough data to decide if this really is a BOM
             # => try again on the next call
             return (u"", 0)
         (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
         self.first = False
         return (output, consumed+3)
     return codecs.utf_8_decode(input, errors, final)
Esempio n. 6
0
 def decode(self, input, errors='strict'):
     if len(input) < 3:
         if codecs.BOM_UTF8.startswith(input):
             return ('', 0)
     elif input[:3] == codecs.BOM_UTF8:
         self.decode = codecs.utf_8_decode
         (output, consumed) = codecs.utf_8_decode(input[3:], errors)
         return (output, consumed + 3)
     self.decode = codecs.utf_8_decode
     return codecs.utf_8_decode(input, errors)
Esempio n. 7
0
 def _buffer_decode(self, input, errors, final):
     if self.first:
         if len(input) < 3:
             if codecs.BOM_UTF8.startswith(input):
                 return ('', 0)
             self.first = 0
         else:
             self.first = 0
             if input[:3] == codecs.BOM_UTF8:
                 (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                 return (output, consumed + 3)
     return codecs.utf_8_decode(input, errors, final)
 def decode(self, input, errors='strict'):
     if len(input) < 3:
         if codecs.BOM_UTF8.startswith(input):
             # not enough data to decide if this is a BOM
             # => try again on the next call
             return (u"", 0)
     elif input[:3] == codecs.BOM_UTF8:
         self.decode = codecs.utf_8_decode
         (output, consumed) = codecs.utf_8_decode(input[3:], errors)
         return (output, consumed + 3)
     # (else) no BOM present
     self.decode = codecs.utf_8_decode
     return codecs.utf_8_decode(input, errors)
Esempio n. 9
0
        def internal_cp34951(sample1):
            self.assertEqual(codecs.utf_8_decode(sample1), (u'12\u20ac\x0a', 6))
            sample1 = sample1[:-1] # 12<euro>
            self.assertEqual(codecs.utf_8_decode(sample1), (u'12\u20ac', 5))
            sample1 = sample1[:-1] # 12<uncomplete euro>
            self.assertEqual(codecs.utf_8_decode(sample1), (u'12', 2))

            sample1 = sample1 + 'x7f' # makes it invalid
            try:
                r = codecs.utf_8_decode(sample1)
                self.assertTrue(False, "expected UncodeDecodeError not raised")
            except Exception as e:
                self.assertEqual(type(e), UnicodeDecodeError)
Esempio n. 10
0
 def _buffer_decode(self, input, errors, final):
     if self.first:
         if len(input) < 3:
             if codecs.BOM_UTF8.startswith(input):
                 # not enough data to decide if this really is a BOM
                 # => try again on the next call
                 return (u"", 0)
             else:
                 self.first = None
         else:
             self.first = None
             if input[:3] == codecs.BOM_UTF8:
                 (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                 return (output, consumed + 3)
     return codecs.utf_8_decode(input, errors, final)
Esempio n. 11
0
def verify_utf8(str):
    """Returns True if str is valid utf8, False otherwise"""
    try:
        ustr = codecs.utf_8_decode(str)
        return True
    except UnicodeError:
        return False
Esempio n. 12
0
    def write(self, text, line_ending='\n', fg=None):
        if not isinstance(text, unicode):
            try:
                text = codecs.utf_8_decode(text)[0]
            except:
                text = codecs.latin_1_decode(text)[0]
        tags, text = parse_mirc.parse_mirc(text)

        if fg:
            tags.append({'data': ("foreground", isinstance(fg, basestring) and ('#%s'%fg) or parse_mirc.get_mirc_color(fg)), 'from': 0, 'to': len(text)})

        buffer = self.get_buffer()
        
        cc = buffer.get_char_count()

        buffer.insert_with_tags_by_name(
            buffer.get_end_iter(),
            text + line_ending,
            'indent'
            )

        for tag in tags:
            tag_name = str(tag['data'])
   
            if not tag_table.lookup(tag_name):
                buffer.create_tag(tag_name, **prop_to_gtk(self, tag['data']))

            buffer.apply_tag_by_name(
                tag_name, 
                buffer.get_iter_at_offset(tag['from'] + cc),
                buffer.get_iter_at_offset(tag['to'] + cc)
                )
Esempio n. 13
0
def percent_decode(path):

    # This is not fast so avoid when we can.
    if '%' not in path:
        return path
    ranges = []
    for m in re.finditer(r'(%[0-9A-F]{2})', path):
        ranges.append((m.start(), m.end()))
    if not len(ranges):
        return path

    # Sorry! Correctness is more important than speed at the moment.
    # Should use a map + lambda eventually.
    result = b''
    skips = 0
    for i, c in enumerate(path):
        if skips > 0:
            skips -= 1
            continue
        c = c.encode('ascii')
        emit = c
        if c == b'%':
            for r in ranges:
                if i == r[0]:
                    import struct
                    emit = struct.pack(
                        "B", hex_octal_to_int(path[i+1])*16 + hex_octal_to_int(path[i+2]))
                    skips = 2
                    break
        if emit:
            result += emit
    return codecs.utf_8_decode(result)[0]
Esempio n. 14
0
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed + prefix)
Esempio n. 15
0
    def convert(self, source):
        """
        Convert markdown to serialized XHTML or HTML.

        Keyword arguments:

        * source: Source text as a Unicode string.

        """

        # Fixup the source text
        if not source.strip():
            return u""  # a blank unicode string
        try:
            source = unicode(source)
        except UnicodeDecodeError:
            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
            return u""

        source = source.replace(STX, "").replace(ETX, "")
        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        source = re.sub(r'\n\s+\n', '\n\n', source)
        source = source.expandtabs(TAB_LENGTH)

        # Split into lines and run the line preprocessors.
        self.lines = source.split("\n")
        for prep in self.preprocessors.values():
            self.lines = prep.run(self.lines)

        # Parse the high-level elements.
        root = self.parser.parseDocument(self.lines).getroot()

        # Run the tree-processors
        for treeprocessor in self.treeprocessors.values():
            newRoot = treeprocessor.run(root)
            if newRoot:
                root = newRoot

        # Serialize _properly_.  Strip top-level tags.
        output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8"))
        if self.stripTopLevelTags:
            try:
                start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2
                end = output.rindex('</%s>'%DOC_TAG)
                output = output[start:end].strip()
            except ValueError:
                if output.strip().endswith('<%s />'%DOC_TAG):
                    # We have an empty document
                    output = ''
                else:
                    # We have a serious problem
                    message(CRITICAL, 'Failed to strip top level tags.')

        # Run the text post-processors
        for pp in self.postprocessors.values():
            output = pp.run(output)

        return output.strip()
Esempio n. 16
0
def summarize_space(space, level=0):
  prefix = "  " * level
  doc = bool(space.documentation)
  info = u"{}{}{}".format(prefix, "", codecs.utf_8_decode(repr(space)))
  print "{}{}".format(info.ljust(75), doc)
  if not hasattr(space, "members"):
    return
  for member in space.members:
    summarize_space(member, level+1)
Esempio n. 17
0
    def utf_8_decode(data, errors, finish = False):
        if not finish:
            count = 0
            while count < 5 and count < len(data) and '\x80' <= data[-count - 1] <= '\xbf':
                count -= 1

            if count < 5 and count < len(data) and '\xc0' <= data[-count - 1] <= '\xfd':
                data = data[:-count - 1]
        return codecs.utf_8_decode(data, errors)
Esempio n. 18
0
File: cesu8.py Progetto: SAP/PyHDB
    def _buffer_decode_step(self, input, errors, final):
        # If begin of CESU-8 sequence
        if input.startswith(SURROGATE_IDENTICATOR_BYTE):
            if len(input) < 6:
                if not final:
                    # Stream is not done yet
                    return u'', 0

                # As there are less than six bytes it can't be a CESU-8 surrogate
                # but probably a UTF-8 byte sequence
                return codecs.utf_8_decode(input, errors, final)

            if PY2:
                bytenums = [ord(b) for b in input[:6]]
            else:
                bytenums = input

            # Verify that the 6 bytes are in possible range of a CESU-8 surrogate
            if bytenums[1] >= 0xa0 and bytenums[1] <= 0xbf and \
               bytenums[2] >= 0x80 and bytenums[2] <= 0xbf and \
               bytenums[3] == SURROGATE_IDENTICATOR_INT and \
               bytenums[4] >= 0xb0 and bytenums[4] <= 0xbf and \
               bytenums[5] >= 0x80 and bytenums[5] <= 0xbf:

                codepoint = (
                    ((bytenums[1] & 0x0f) << 16) +
                    ((bytenums[2] & 0x3f) << 10) +
                    ((bytenums[4] & 0x0f) << 6) +
                    (bytenums[5] & 0x3f) +
                    0x10000
                )
                return unichr(codepoint), 6

            # No CESU-8 surrogate but probably a 3 byte UTF-8 sequence
            return codecs.utf_8_decode(input[:3], errors, final)

        cesu8_surrogate_start = input.find(SURROGATE_IDENTICATOR_BYTE)
        if cesu8_surrogate_start > 0:
            # Decode everything until start of cesu8 surrogate pair
            return codecs.utf_8_decode(input[:cesu8_surrogate_start], errors, final)

        # No sign of CESU-8 encoding
        return codecs.utf_8_decode(input, errors, final)
 def utf_8_decode(data, errors, finish=False):
     if not finish:
         # We are trying to remove a possible incomplete multibyte character
         # from the suffix of the data.
         # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
         # All further bytes are in the range 0x80 to 0xbf.
         # UTF-8 encoded UCS characters may be up to six bytes long.
         count = 0
         while count < 5 and count < len(data) and "\x80" <= data[-count - 1] <= "\xBF":
             count -= 1
         if count < 5 and count < len(data) and "\xC0" <= data[-count - 1] <= "\xFD":
             data = data[: -count - 1]
     return codecs.utf_8_decode(data, errors)
    def add_msg_object(self, parent, path, name, obj, obj_type):
        label = name
        
        if hasattr(obj, '__slots__'):
            subobjs = [(slot, getattr(obj, slot)) for slot in obj.__slots__]
        elif type(obj) is list or type(obj) is tuple:
            subobjs = [('[%d]' % i, subobj) for (i, subobj) in enumerate(obj)]
        else:
            subobjs = []
            
            # Ignore any binary data
            obj_repr = codecs.utf_8_decode(str(obj), 'ignore')[0]
            
            # Truncate long representations
            if len(obj_repr) >= 50:
                obj_repr = obj_repr[:50] + '...'
            
            label += ': ' + obj_repr

        if parent is None:
            item = self.msg_tree.AddRoot(label)
        else:
            item = self.msg_tree.AppendItem(parent, label)

        self.msg_tree.SetItemFont(item, self.font)
        self.msg_tree.SetItemPyData(item, (path, obj_type))

        if self.msg_item_is_plottable(item):
            self.msg_tree.SetItemTextColour(item, wx.Colour(0, 0, 0))
        else:
            self.msg_tree.SetItemTextColour(item, wx.Colour(100, 100, 100))

        for subobj_name, subobj in subobjs:
            if subobj is None:
                continue
            
            if path == '':
                subpath = subobj_name                       # root field
            elif subobj_name.startswith('['):
                subpath = '%s%s' % (path, subobj_name)      # list, dict, or tuple
            else:
                subpath = '%s.%s' % (path, subobj_name)     # attribute (prefix with '.')

            if hasattr(subobj, '_type'):
                subobj_type = subobj._type
            else:
                subobj_type = type(subobj).__name__

            self.add_msg_object(item, subpath, subobj_name, subobj, subobj_type)
Esempio n. 21
0
    def test_transport(self):
        in_ = BytesIO(utf_8_encode('{"test_input":1}\n// END\n')[0])
        out = BytesIO()

        xp = TacoTransport(in_, out)

        xp.write({'test_output': 2})

        r = utf_8_decode(out.getvalue())[0]

        self.assertEqual(r, "{\"test_output\": 2}\n// END\n")

        r = xp.read()

        self.assertEqual(r, {'test_input': 1})
Esempio n. 22
0
def extractPDFText(pdf_filename):
    """Given an pdf file name, returns a new file object of the
    text of that PDF.  Uses the 'pdftotext' utility."""
    os.popen("%s %s" % (pdftotext,pdf_filename,))
    
    txtfile = pdf_filename + ".txt"
    txt = getTextFromFile(txtfile)
    try:
        utxt = codecs.utf_8_decode(txt,'ignore')
        return utxt[0]
        #encoded = txt.encode("utf8")
        #return encoded
    except Exception, e:
        raise
        print str(e)
        return unicode("Could not capture text from PDF document: %s" % str(e))
Esempio n. 23
0
def include(group):
    assert (group is not None)
    current = ops.env.get(ops.survey.EXCLUDE, addr='')
    if (current is None):
        current = []
    else:
        current = json.loads(current)
    if (str is type(group)):
        group = codecs.utf_8_decode(group)[0]
    if (group in current):
        current.remove(group)
        ops.env.set(ops.survey.EXCLUDE, json.dumps(current, ensure_ascii=False), addr='')
        EXCLUDED_GROUPS = current
        return True
    else:
        return False
def countsyll(instring):
    """This function counts the number of characters in a tamil string
        This is done by ignoring the vowel additions. If one uses the len
        function, the sample string has a length of 17 - but there are actually
        only 11 characters"""
    s = codecs.utf_8_encode(instring)
    print s
    x = codecs.utf_8_decode(s[0])[0]    
    print repr(x)
    syllen = 0
    vowels = [u'\u0bbe',u'\u0bbf',u'\u0bc0',
                u'\u0bc1',u'\u0bc2',u'\u0bc6',
                u'\u0bc7',u'\u0bc8',u'\u0bca',
                u'\u0bcb',u'\u0bcc',u'\u0bcd',]
    for y in x:
        if y not in vowels:
            syllen += 1    
    return syllen
Esempio n. 25
0
def decode(input, errors='strict', *args):
    """Finds indent and add a colon on previous line"""
    u, l = codecs.utf_8_decode(input, errors, True)
    out = []
    offset = 0
    for line in u.split('\n'):
        if line.strip():
            indent = len(line) - len(line.lstrip())
            if indent > offset:
                i = -1
                while not out[i].strip() and len(out) > -i:
                    i -= 1

                if out[i].rstrip()[-1] != ':':
                    out[i] += ':'
            offset = indent
        out.append(line)
    return '\n'.join(out), l
Esempio n. 26
0
def internet_decode(input, errors='strict', final=False):
    """The core decoding function"""
    try:
        # First try utf-8. This should be the usual case by far.
        return codecs.utf_8_decode(input, errors, final)
    except UnicodeDecodeError:
        try:
            # If that fails, try windows-1252 (aka cp1252), which defines more characters than latin1,
            # but will fail for five particular bytes: 0x81, 0x8D, 0x8F, 0x90, 0x9D
            return codecs.charmap_decode(input, errors, encodings.cp1252.decoding_table)
        except UnicodeDecodeError:
            # and finally, try latin-1, which never fails, but defines 27 less characters than cp1252.
            return codecs.latin_1_decode(input, errors)
    except UnicodeEncodeError:
        # Was that thing already unicode? Then it's already decoded.
        if isinstance(input, unicode):
            return (input, len(input))
        else:
            raise
Esempio n. 27
0
    def convert (self, source):
        """Convert markdown to serialized XHTML."""

        # Fixup the source text
        if not source:
            return u""  # a blank unicode string
        try:
            source = unicode(source)
        except UnicodeDecodeError:
            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
            return u""

        source = source.replace(STX, "").replace(ETX, "")
        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        source = re.sub(r'\n\s+\n', '\n\n', source)
        source = source.expandtabs(TAB_LENGTH)

        # Split into lines and run the line preprocessors.
        self.lines = source.split("\n")
        for prep in self.preprocessors.values():
            self.lines = prep.run(self.lines)

        # Parse the high-level elements.
        root = self.parser.parseDocument(self.lines).getroot()

        # Run the tree-processors
        for treeprocessor in self.treeprocessors.values():
            newRoot = treeprocessor.run(root)
            if newRoot:
                root = newRoot

        # Serialize _properly_.  Strip top-level tags.
        xml, length = codecs.utf_8_decode(etree.tostring(root, encoding="utf8"))
        if self.stripTopLevelTags:
            xml = xml.strip()[44:-7] + "\n"

        # Run the text post-processors
        for pp in self.postprocessors.values():
            xml = pp.run(xml)

        return xml.strip()
Esempio n. 28
0
    def read(self):
        """Read a message from the input stream.

        The decoded message is returned as a data structure, or
        None is returned if nothing was read.
        """

        text = ''
        while True:
            line = self.in_.readline()
            line = utf_8_decode(line)[0]

            if line == '' or line.startswith('// END'):
                break

            text += line

        if text == '':
            return None

        return self.decoder.decode(text)
Esempio n. 29
0
def getDateStr(format="%Y_%b_%d_%H%M"):
    """Uses ``time.strftime()``_ to generate a string of the form
    2012_Apr_19_1531 for 19th April 3.31pm, 2012.
    This is often useful appended to data filenames to provide unique names.
    To include the year: getDateStr(format="%Y_%b_%d_%H%M")
    returns '2011_Mar_16_1307' depending on locale, can have unicode chars
    in month names, so utf_8_decode them
    For date in the format of the current localization, do:
        data.getDateStr(format=locale.nl_langinfo(locale.D_T_FMT))
    """
    now = time.strftime(format, time.localtime())
    if PY3:
        return now
    else:
        try:
            now_decoded = codecs.utf_8_decode(now)[0]
        except UnicodeDecodeError:
            # '2011_03_16_1307'
            now_decoded = time.strftime("%Y_%m_%d_%H%M", time.localtime())

        return now_decoded
Esempio n. 30
0
def getDateStr(format="%Y_%b_%d_%H%M"):
    """Uses ``time.strftime()``_ to generate a string of the form
    2012_Apr_19_1531 for 19th April 3.31pm, 2012.
    This is often useful appended to data filenames to provide unique names.
    To include the year: getDateStr(format="%Y_%b_%d_%H%M")
    returns '2011_Mar_16_1307' depending on locale, can have unicode chars
    in month names, so utf_8_decode them
    For date in the format of the current localization, do:
        data.getDateStr(format=locale.nl_langinfo(locale.D_T_FMT))
    """
    now = time.strftime(format, time.localtime())
    if PY3:
        return now
    else:
        try:
            now_decoded = codecs.utf_8_decode(now)[0]
        except UnicodeDecodeError:
            # '2011_03_16_1307'
            now_decoded = time.strftime("%Y_%m_%d_%H%M", time.localtime())

        return now_decoded
Esempio n. 31
0
def get_nice_result(result, sig):
    '''Convert a result that may be a java object into a string'''
    if result is None:
        return None
    env = get_env()
    if sig == 'Ljava/lang/String;':
        return codecs.utf_8_decode(env.get_string_utf(result), 'replace')[0]
    if sig == 'Ljava/lang/Integer;':
        return call(result, 'intValue', '()I')
    if sig == 'Ljava/lang/Long':
        return call(result, 'longValue', '()J')
    if sig == 'Ljava/lang/Boolean;':
        return call(result, 'booleanValue', '()Z')
    if sig == '[B':
        # Convert a byte array into a numpy array
        return env.get_byte_array_elements(result)
    if isinstance(result, javabridge.JB_Object):
        #
        # Do longhand to prevent recursion
        #
        rklass = env.get_object_class(result)
        m = env.get_method_id(rklass, 'getClass', '()Ljava/lang/Class;')
        rclass = env.call_method(result, m)
        rkklass = env.get_object_class(rclass)
        m = env.get_method_id(rkklass, 'isPrimitive', '()Z')
        is_primitive = env.call_method(rclass, m)
        if is_primitive:
            rc = get_class_wrapper(rclass, True)
            classname = rc.getCanonicalName()
            if classname == 'boolean':
                return to_string(result) == 'true'
            elif classname in ('int', 'byte', 'short', 'long'):
                return int(to_string(result))
            elif classname in ('float', 'double'):
                return float(to_string(result))
            elif classname == 'char':
                return to_string(result)
    return result
Esempio n. 32
0
def get_nice_result(result, sig):
    '''Convert a result that may be a java object into a string'''
    if result is None:
        return None
    env = get_env()
    if sig == 'Ljava/lang/String;':
        return codecs.utf_8_decode(env.get_string_utf(result), 'replace')[0]
    if sig == 'Ljava/lang/Integer;':
        return call(result, 'intValue', '()I')
    if sig == 'Ljava/lang/Long':
        return call(result, 'longValue', '()J')
    if sig == 'Ljava/lang/Boolean;':
        return call(result, 'booleanValue', '()Z')
    if sig == '[B':
        # Convert a byte array into a numpy array
        return env.get_byte_array_elements(result)
    if isinstance(result, javabridge.JB_Object):
        #
        # Do longhand to prevent recursion
        #
        rklass = env.get_object_class(result)
        m = env.get_method_id(rklass, 'getClass', '()Ljava/lang/Class;')
        rclass = env.call_method(result, m)
        rkklass = env.get_object_class(rclass)
        m = env.get_method_id(rkklass, 'isPrimitive', '()Z')
        is_primitive = env.call_method(rclass, m)
        if is_primitive:
            rc = get_class_wrapper(rclass, True)
            classname = rc.getCanonicalName()
            if classname == 'boolean':
                return to_string(result) == 'true'
            elif classname in ('int', 'byte', 'short', 'long'):
                return int(to_string(result))
            elif classname in ('float', 'double'):
                return float(to_string(result))
            elif classname == 'char':
                return to_string(result)
    return result
Esempio n. 33
0
 def test_help(self):
     """
     validate /management_api/v1/help is correct
     """
     url = ''.join([get_base_URI(), '/management_api/v1/help'])
     with self.subTest(url=url):
         # check response
         result = requests.get(url, verify=False)
         self.assertEqual(result.status_code, 200, "HTTP GET returns Okay")
         # check header
         expected_header = 'Content-Type'
         self.assertIn(expected_header, result.headers)
         expected_content_type = 'text/html; charset=utf-8'
         self.assertEqual(result.headers[expected_header],
                          expected_content_type)
         expected_encoding = 'utf-8'
         self.assertEqual(result.encoding, expected_encoding)
         # check content
         self.assertIsInstance(result.content, bytes)
         response_text, decoded_length = codecs.utf_8_decode(result.content)
         expected_value = 'Management API'
         self.assertIn(expected_value, response_text)
         # check Privacy header
         self.util_check_p3p_header(result)
Esempio n. 34
0
    def convert(self, source):
        """
        Convert markdown to serialized XHTML or HTML.

        Keyword arguments:

        * source: Source text as a Unicode string.

        """

        # Fixup the source text
        if not source.strip():
            return ""  # a blank unicode string
        try:
            source = str(source)
        except UnicodeDecodeError:
            message(
                CRITICAL,
                'UnicodeDecodeError: Markdown only accepts unicode or ascii input.'
            )
            return ""

        source = source.replace(STX, "").replace(ETX, "")
        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        source = re.sub(r'\n\s+\n', '\n\n', source)
        source = source.expandtabs(TAB_LENGTH)

        # Split into lines and run the line preprocessors.
        self.lines = source.split("\n")
        for prep in list(self.preprocessors.values()):
            self.lines = prep.run(self.lines)

        # Parse the high-level elements.
        root = self.parser.parseDocument(self.lines).getroot()

        # Run the tree-processors
        for treeprocessor in list(self.treeprocessors.values()):
            newRoot = treeprocessor.run(root)
            if newRoot:
                root = newRoot

        # Serialize _properly_.  Strip top-level tags.
        output, length = codecs.utf_8_decode(
            self.serializer(root, encoding="utf-8"))
        if self.stripTopLevelTags:
            try:
                start = output.index('<%s>' % DOC_TAG) + len(DOC_TAG) + 2
                end = output.rindex('</%s>' % DOC_TAG)
                output = output[start:end].strip()
            except ValueError:
                if output.strip().endswith('<%s />' % DOC_TAG):
                    # We have an empty document
                    output = ''
                else:
                    # We have a serious problem
                    message(CRITICAL, 'Failed to strip top level tags.')

        # Run the text post-processors
        for pp in list(self.postprocessors.values()):
            output = pp.run(output)

        return output.strip()
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--user", default='')
    args = ap.parse_args()

    # First connect to metaserver to get server address (which should be 192.168.5.1:8888)
    print("connecting to metaserver")
    metaserver = ("192.168.5.1", 8875)
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect(metaserver)
    server_url = codecs.utf_8_decode(s.recv(1024))[0]
    server = server_url.strip().split(":")
    server = server[0], int(server[1])
    print("metaserver says: " + repr(server))
    assert server == ('192.168.5.1', 8888), "metaserver address changed"

    # Next connect to server
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect(server)
    serv = ServerInteraction(s)

    # Login
    if args.user != '':
        uname = args.user
    else:
        uname = "user%d" % (random.getrandbits(8))
    serv.send(NAP_MKUSER, uname)
    c, _ = serv.recv()
    assert c == NAP_UNOK, "username check failed!"
    client_info = "nooopster-v0.0.0"
    data_port = 8080
    password = "".join(secrets.choice(string.ascii_letters) for i in range(8))
    serv.send(NAP_LOGIN,
              '%s %s %d "%s" 0' % (uname, password, data_port, client_info))
    c, _ = serv.recv()
    if c != NAP_LOGSUCCESS:
        print('PUBLIC: login failed')
        exit(1)
    assert c == NAP_LOGSUCCESS, "login failed"
    print("login ok")

    # Search nooopster's files
    serv.send(NAP_BROWSE, TARGET_USER)
    files = {}
    while True:
        c, m = serv.recv()
        if c == NAP_RBROWSE:
            peer_uname, fname, md5, size, bitrate, freq, time = shlex.split(m)
            files[fname] = (peer_uname, md5, size, bitrate, freq, time)
            # print("browse result: " + repr(m))
            continue
        elif c == NAP_DBROWSE:
            print("end of browse. client nick, IP: " + repr(m))
            break
        else:
            # Other message types are discarded
            pass

    if not (TARGET_FILE in files):
        print('PUBLIC: target file not found in users file list')
        exit(1)
    assert TARGET_FILE in files

    try:
        # Attempt to get file
        serv.send(NAP_DGET, '%s "%s"' % (TARGET_USER, TARGET_FILE))
        c, m = serv.recv_special([NAP_SGET, NAP_NGET])
        assert c == NAP_SGET, "get file failed"
        peer_uname, peer_ip, peer_port, fname, md5, linespeed = shlex.split(m)
        fsize = files[TARGET_FILE][2]  # string of filesize in decimal

        # Connect to peer to download file
        peer_ip_str = int_to_ip(int(peer_ip))
        peer_port = int(peer_port)
        print("server reports ip addr of user is: " + peer_ip_str)
        peer = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        peer.connect((peer_ip_str, peer_port))

        # Receive the '1' byte
        print("receiving ascii '1' byte")
        b = peer.recv(1)
        assert b == b"1"
        peer.send(b"GET")
        peer.send(('%s "%s" 0' % (uname, TARGET_FILE)).encode("utf-8"))

        # Receive file length string
        print("receiving filesize")
        data = recvall(peer, len(fsize)).decode("utf-8")
        assert fsize == data, "invalid file size, expected %s got %s" % (
            repr(fsize),
            repr(data),
        )
        data = recvall(peer, int(fsize))

        # Check md5sum of data
        digest = hashlib.md5(data).hexdigest()
        print("file hash: " + digest)
        assert digest == "cc852cef3cc4bbfc993ba055cca437fc", "invalid file hash!"
    except:
        print('PUBLIC: download failed')
        exit(1)

    print('PUBLIC: ok')
Esempio n. 36
0
class AuthTktCookieHelper(object):
    """
    A helper class for use in third-party authentication policy
    implementations.  See
    :class:`pyramid.authentication.AuthTktAuthenticationPolicy` for the
    meanings of the constructor arguments.
    """
    parse_ticket = staticmethod(parse_ticket) # for tests
    AuthTicket = AuthTicket # for tests
    BadTicket = BadTicket # for tests
    now = None # for tests

    userid_type_decoders = {
        'int':int,
        'unicode':lambda x: utf_8_decode(x)[0], # bw compat for old cookies
        'b64unicode': lambda x: utf_8_decode(b64decode(x))[0],
        'b64str': lambda x: b64decode(x),
        }

    userid_type_encoders = {
        int: ('int', str),
        long: ('int', str),
        text_type: ('b64unicode', lambda x: b64encode(utf_8_encode(x)[0])),
        binary_type: ('b64str', lambda x: b64encode(x)),
        }

    def __init__(self, secret, cookie_name='auth_tkt', secure=False,
                 include_ip=False, timeout=None, reissue_time=None,
                 max_age=None, http_only=False, path="/", wild_domain=True,
                 hashalg='md5', parent_domain=False, domain=None):

        serializer = _SimpleSerializer()

        self.cookie_profile = CookieProfile(
            cookie_name=cookie_name,
            secure=secure,
            max_age=max_age,
            httponly=http_only,
            path=path,
            serializer=serializer
        )

        self.secret = secret
        self.cookie_name = cookie_name
        self.secure = secure
        self.include_ip = include_ip
        self.timeout = timeout if timeout is None else int(timeout)
        self.reissue_time = reissue_time if reissue_time is None else int(reissue_time)
        self.max_age = max_age if max_age is None else int(max_age)
        self.wild_domain = wild_domain
        self.parent_domain = parent_domain
        self.domain = domain
        self.hashalg = hashalg

    def _get_cookies(self, request, value, max_age=None):
        cur_domain = request.domain

        domains = []
        if self.domain:
            domains.append(self.domain)
        else:
            if self.parent_domain and cur_domain.count('.') > 1:
                domains.append('.' + cur_domain.split('.', 1)[1])
            else:
                domains.append(None)
                domains.append(cur_domain)
                if self.wild_domain:
                    domains.append('.' + cur_domain)

        profile = self.cookie_profile(request)

        kw = {}
        kw['domains'] = domains
        if max_age is not None:
            kw['max_age'] = max_age

        headers = profile.get_headers(value, **kw)
        return headers

    def identify(self, request):
        """ Return a dictionary with authentication information, or ``None``
        if no valid auth_tkt is attached to ``request``"""
        environ = request.environ
        cookie = request.cookies.get(self.cookie_name)

        if cookie is None:
            return None

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        try:
            timestamp, userid, tokens, user_data = self.parse_ticket(
                self.secret, cookie, remote_addr, self.hashalg)
        except self.BadTicket:
            return None

        now = self.now # service tests

        if now is None:
            now = time_mod.time()

        if self.timeout and ( (timestamp + self.timeout) < now ):
            # the auth_tkt data has expired
            return None

        userid_typename = 'userid_type:'
        user_data_info = user_data.split('|')
        for datum in filter(None, user_data_info):
            if datum.startswith(userid_typename):
                userid_type = datum[len(userid_typename):]
                decoder = self.userid_type_decoders.get(userid_type)
                if decoder:
                    userid = decoder(userid)

        reissue = self.reissue_time is not None

        if reissue and not hasattr(request, '_authtkt_reissued'):
            if ( (now - timestamp) > self.reissue_time ):
                # See https://github.com/Pylons/pyramid/issues#issue/108
                tokens = list(filter(None, tokens))
                headers = self.remember(request, userid, max_age=self.max_age,
                                        tokens=tokens)
                def reissue_authtkt(request, response):
                    if not hasattr(request, '_authtkt_reissue_revoked'):
                        for k, v in headers:
                            response.headerlist.append((k, v))
                request.add_response_callback(reissue_authtkt)
                request._authtkt_reissued = True

        environ['REMOTE_USER_TOKENS'] = tokens
        environ['REMOTE_USER_DATA'] = user_data
        environ['AUTH_TYPE'] = 'cookie'

        identity = {}
        identity['timestamp'] = timestamp
        identity['userid'] = userid
        identity['tokens'] = tokens
        identity['userdata'] = user_data
        return identity

    def forget(self, request):
        """ Return a set of expires Set-Cookie headers, which will destroy
        any existing auth_tkt cookie when attached to a response"""
        request._authtkt_reissue_revoked = True
        return self._get_cookies(request, None)

    def remember(self, request, userid, max_age=None, tokens=()):
        """ Return a set of Set-Cookie headers; when set into a response,
        these headers will represent a valid authentication ticket.

        ``max_age``
          The max age of the auth_tkt cookie, in seconds.  When this value is
          set, the cookie's ``Max-Age`` and ``Expires`` settings will be set,
          allowing the auth_tkt cookie to last between browser sessions.  If
          this value is ``None``, the ``max_age`` value provided to the
          helper itself will be used as the ``max_age`` value.  Default:
          ``None``.

        ``tokens``
          A sequence of strings that will be placed into the auth_tkt tokens
          field.  Each string in the sequence must be of the Python ``str``
          type and must match the regex ``^[A-Za-z][A-Za-z0-9+_-]*$``.
          Tokens are available in the returned identity when an auth_tkt is
          found in the request and unpacked.  Default: ``()``.
        """
        max_age = self.max_age if max_age is None else int(max_age)

        environ = request.environ

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        user_data = ''

        encoding_data = self.userid_type_encoders.get(type(userid))

        if encoding_data:
            encoding, encoder = encoding_data
        else:
            warnings.warn(
                "userid is of type {}, and is not supported by the "
                "AuthTktAuthenticationPolicy. Explicitly converting to string "
                "and storing as base64. Subsequent requests will receive a "
                "string as the userid, it will not be decoded back to the type "
                "provided.".format(type(userid)), RuntimeWarning
            )
            encoding, encoder = self.userid_type_encoders.get(text_type)
            userid = str(userid)

        userid = encoder(userid)
        user_data = 'userid_type:%s' % encoding

        new_tokens = []
        for token in tokens:
            if isinstance(token, text_type):
                try:
                    token = ascii_native_(token)
                except UnicodeEncodeError:
                    raise ValueError("Invalid token %r" % (token,))
            if not (isinstance(token, str) and VALID_TOKEN.match(token)):
                raise ValueError("Invalid token %r" % (token,))
            new_tokens.append(token)
        tokens = tuple(new_tokens)

        if hasattr(request, '_authtkt_reissued'):
            request._authtkt_reissue_revoked = True

        ticket = self.AuthTicket(
            self.secret,
            userid,
            remote_addr,
            tokens=tokens,
            user_data=user_data,
            cookie_name=self.cookie_name,
            secure=self.secure,
            hashalg=self.hashalg
            )

        cookie_value = ticket.cookie_value()
        return self._get_cookies(request, cookie_value, max_age)
Esempio n. 37
0
def _utf8(b: bytes) -> str:
    """Parse bytes we already know are utf-8."""
    return codecs.utf_8_decode(b, None, True)[0]
Esempio n. 38
0
class AuthTktCookieHelper(object):
    """
    A helper class for security policies that obtains data from an "auth
    ticket" cookie.

    Constructor Arguments

    ``secret``

       The secret (a string) used for auth_tkt cookie signing.  This value
       should be unique across all values provided to Pyramid for various
       subsystem secrets (see :ref:`admonishment_against_secret_sharing`).
       Required.

    ``callback``

       Default: ``None``.  A callback passed the userid and the
       request, expected to return ``None`` if the userid doesn't
       exist or a sequence of principal identifiers (possibly empty) if
       the user does exist.  If ``callback`` is ``None``, the userid
       will be assumed to exist with no principals.  Optional.

    ``cookie_name``

       Default: ``auth_tkt``.  The cookie name used
       (string).  Optional.

    ``secure``

       Default: ``False``.  Only send the cookie back over a secure
       conn.  Optional.

    ``include_ip``

       Default: ``False``.  Make the requesting IP address part of
       the authentication data in the cookie.  Optional.

       For IPv6 this option is not recommended. The ``mod_auth_tkt``
       specification does not specify how to handle IPv6 addresses, so using
       this option in combination with IPv6 addresses may cause an
       incompatible cookie. It ties the authentication ticket to that
       individual's IPv6 address.

    ``timeout``

       Default: ``None``.  Maximum number of seconds which a newly
       issued ticket will be considered valid.  After this amount of
       time, the ticket will expire (effectively logging the user
       out).  If this value is ``None``, the ticket never expires.
       Optional.

    ``reissue_time``

       Default: ``None``.  If this parameter is set, it represents the number
       of seconds that must pass before an authentication token cookie is
       automatically reissued as the result of a request which requires
       authentication.  The duration is measured as the number of seconds
       since the last auth_tkt cookie was issued and 'now'.  If this value is
       ``0``, a new ticket cookie will be reissued on every request which
       requires authentication.

       A good rule of thumb: if you want auto-expired cookies based on
       inactivity: set the ``timeout`` value to 1200 (20 mins) and set the
       ``reissue_time`` value to perhaps a tenth of the ``timeout`` value
       (120 or 2 mins).  It's nonsensical to set the ``timeout`` value lower
       than the ``reissue_time`` value, as the ticket will never be reissued
       if so.  However, such a configuration is not explicitly prevented.

       Optional.

    ``max_age``

       Default: ``None``.  The max age of the auth_tkt cookie, in
       seconds.  This differs from ``timeout`` inasmuch as ``timeout``
       represents the lifetime of the ticket contained in the cookie,
       while this value represents the lifetime of the cookie itself.
       When this value is set, the cookie's ``Max-Age`` and
       ``Expires`` settings will be set, allowing the auth_tkt cookie
       to last between browser sessions.  It is typically nonsensical
       to set this to a value that is lower than ``timeout`` or
       ``reissue_time``, although it is not explicitly prevented.
       Optional.

    ``path``

       Default: ``/``. The path for which the auth_tkt cookie is valid.
       May be desirable if the application only serves part of a domain.
       Optional.

    ``http_only``

       Default: ``False``. Hide cookie from JavaScript by setting the
       HttpOnly flag. Not honored by all browsers.
       Optional.

    ``wild_domain``

       Default: ``True``. An auth_tkt cookie will be generated for the
       wildcard domain. If your site is hosted as ``example.com`` this
       will make the cookie available for sites underneath ``example.com``
       such as ``www.example.com``.
       Optional.

    ``parent_domain``

       Default: ``False``. An auth_tkt cookie will be generated for the
       parent domain of the current site. For example if your site is
       hosted under ``www.example.com`` a cookie will be generated for
       ``.example.com``. This can be useful if you have multiple sites
       sharing the same domain. This option supercedes the ``wild_domain``
       option.
       Optional.

    ``domain``

       Default: ``None``. If provided the auth_tkt cookie will only be
       set for this domain. This option is not compatible with ``wild_domain``
       and ``parent_domain``.
       Optional.

    ``hashalg``

       Default: ``sha512`` (the literal string).

       Any hash algorithm supported by Python's ``hashlib.new()`` function
       can be used as the ``hashalg``.

       Cookies generated by different instances of AuthTktAuthenticationPolicy
       using different ``hashalg`` options are not compatible. Switching the
       ``hashalg`` will imply that all existing users with a valid cookie will
       be required to re-login.

       Optional.

    ``debug``

        Default: ``False``.  If ``debug`` is ``True``, log messages to the
        Pyramid debug logger about the results of various authentication
        steps.  The output from debugging is useful for reporting to maillist
        or IRC channels when asking for support.

    ``samesite``

        Default: ``'Lax'``.  The 'samesite' option of the session cookie. Set
        the value to ``None`` to turn off the samesite option.

    """

    parse_ticket = staticmethod(parse_ticket)  # for tests
    AuthTicket = AuthTicket  # for tests
    BadTicket = BadTicket  # for tests
    now = None  # for tests

    userid_type_decoders = {
        'int': int,
        'unicode': lambda x: utf_8_decode(x)[0],  # bw compat for old cookies
        'b64unicode': lambda x: utf_8_decode(b64decode(x))[0],
        'b64str': lambda x: b64decode(x),
    }

    userid_type_encoders = {
        int: ('int', str),
        str: ('b64unicode', lambda x: b64encode(utf_8_encode(x)[0])),
        bytes: ('b64str', lambda x: b64encode(x)),
    }

    def __init__(
        self,
        secret,
        cookie_name='auth_tkt',
        secure=False,
        include_ip=False,
        timeout=None,
        reissue_time=None,
        max_age=None,
        http_only=False,
        path="/",
        wild_domain=True,
        hashalg='md5',
        parent_domain=False,
        domain=None,
        samesite='Lax',
    ):
        self.cookie_profile = CookieProfile(
            cookie_name=cookie_name,
            secure=secure,
            max_age=max_age,
            httponly=http_only,
            path=path,
            serializer=SimpleSerializer(),
            samesite=samesite,
        )

        self.secret = secret
        self.cookie_name = cookie_name
        self.secure = secure
        self.include_ip = include_ip
        self.timeout = timeout if timeout is None else int(timeout)
        self.reissue_time = (
            reissue_time if reissue_time is None else int(reissue_time)
        )
        self.max_age = max_age if max_age is None else int(max_age)
        self.wild_domain = wild_domain
        self.parent_domain = parent_domain
        self.domain = domain
        self.hashalg = hashalg

    def _get_cookies(self, request, value, max_age=None):
        cur_domain = request.domain

        domains = []
        if self.domain:
            domains.append(self.domain)
        else:
            if self.parent_domain and cur_domain.count('.') > 1:
                domains.append('.' + cur_domain.split('.', 1)[1])
            else:
                domains.append(None)
                domains.append(cur_domain)
                if self.wild_domain:
                    domains.append('.' + cur_domain)

        profile = self.cookie_profile(request)

        kw = {}
        kw['domains'] = domains
        if max_age is not None:
            kw['max_age'] = max_age

        headers = profile.get_headers(value, **kw)
        return headers

    def identify(self, request):
        """ Return a dictionary with authentication information, or ``None``
        if no valid auth_tkt is attached to ``request``"""
        environ = request.environ
        cookie = request.cookies.get(self.cookie_name)

        if cookie is None:
            return None

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        try:
            timestamp, userid, tokens, user_data = self.parse_ticket(
                self.secret, cookie, remote_addr, self.hashalg
            )
        except self.BadTicket:
            return None

        now = self.now  # service tests

        if now is None:
            now = time_mod.time()

        if self.timeout and ((timestamp + self.timeout) < now):
            # the auth_tkt data has expired
            return None

        userid_typename = 'userid_type:'
        user_data_info = user_data.split('|')
        for datum in filter(None, user_data_info):
            if datum.startswith(userid_typename):
                userid_type = datum[len(userid_typename) :]
                decoder = self.userid_type_decoders.get(userid_type)
                if decoder:
                    userid = decoder(userid)

        reissue = self.reissue_time is not None

        if reissue and not hasattr(request, '_authtkt_reissued'):
            if (now - timestamp) > self.reissue_time:
                # See https://github.com/Pylons/pyramid/issues#issue/108
                tokens = list(filter(None, tokens))
                headers = self.remember(
                    request, userid, max_age=self.max_age, tokens=tokens
                )

                def reissue_authtkt(request, response):
                    if not hasattr(request, '_authtkt_reissue_revoked'):
                        for k, v in headers:
                            response.headerlist.append((k, v))

                request.add_response_callback(reissue_authtkt)
                request._authtkt_reissued = True

        environ['REMOTE_USER_TOKENS'] = tokens
        environ['REMOTE_USER_DATA'] = user_data
        environ['AUTH_TYPE'] = 'cookie'

        identity = {}
        identity['timestamp'] = timestamp
        identity['userid'] = userid
        identity['tokens'] = tokens
        identity['userdata'] = user_data
        return identity

    def forget(self, request):
        """ Return a set of expires Set-Cookie headers, which will destroy
        any existing auth_tkt cookie when attached to a response"""
        request._authtkt_reissue_revoked = True
        return self._get_cookies(request, None)

    def remember(self, request, userid, max_age=None, tokens=()):
        """ Return a set of Set-Cookie headers; when set into a response,
        these headers will represent a valid authentication ticket.

        ``max_age``
          The max age of the auth_tkt cookie, in seconds.  When this value is
          set, the cookie's ``Max-Age`` and ``Expires`` settings will be set,
          allowing the auth_tkt cookie to last between browser sessions.  If
          this value is ``None``, the ``max_age`` value provided to the
          helper itself will be used as the ``max_age`` value.  Default:
          ``None``.

        ``tokens``
          A sequence of strings that will be placed into the auth_tkt tokens
          field.  Each string in the sequence must be of the Python ``str``
          type and must match the regex ``^[A-Za-z][A-Za-z0-9+_-]*$``.
          Tokens are available in the returned identity when an auth_tkt is
          found in the request and unpacked.  Default: ``()``.
        """
        max_age = self.max_age if max_age is None else int(max_age)

        environ = request.environ

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        user_data = ''

        encoding_data = self.userid_type_encoders.get(type(userid))

        if encoding_data:
            encoding, encoder = encoding_data
        else:
            warnings.warn(
                "userid is of type {}, and is not supported by the "
                "AuthTktAuthenticationPolicy. Explicitly converting to string "
                "and storing as base64. Subsequent requests will receive a "
                "string as the userid, it will not be decoded back to the "
                "type provided.".format(type(userid)),
                RuntimeWarning,
            )
            encoding, encoder = self.userid_type_encoders.get(str)
            userid = str(userid)

        userid = encoder(userid)
        user_data = 'userid_type:%s' % encoding

        new_tokens = []
        for token in tokens:
            if isinstance(token, str):
                try:
                    token = ascii_(token)
                except UnicodeEncodeError:
                    raise ValueError("Invalid token %r" % (token,))
            if not (isinstance(token, str) and VALID_TOKEN.match(token)):
                raise ValueError("Invalid token %r" % (token,))
            new_tokens.append(token)
        tokens = tuple(new_tokens)

        if hasattr(request, '_authtkt_reissued'):
            request._authtkt_reissue_revoked = True

        ticket = self.AuthTicket(
            self.secret,
            userid,
            remote_addr,
            tokens=tokens,
            user_data=user_data,
            cookie_name=self.cookie_name,
            secure=self.secure,
            hashalg=self.hashalg,
        )

        cookie_value = ticket.cookie_value()
        return self._get_cookies(request, cookie_value, max_age)
Esempio n. 39
0
class AuthTktCookiePlugin(object):

    userid_type_decoders = {
        'int': int,
        'unicode': lambda x: utf_8_decode(x)[0],
    }

    userid_type_encoders = {
        int: ('int', str),
    }
    try:
        userid_type_encoders[long] = ('int', str)
    except NameError:  #pragma NO COVER Python >= 3.0
        pass
    try:
        userid_type_encoders[unicode] = ('unicode',
                                         lambda x: utf_8_encode(x)[0])
    except NameError:  #pragma NO COVER Python >= 3.0
        pass

    def __init__(self,
                 secret,
                 cookie_name='auth_tkt',
                 secure=False,
                 include_ip=False,
                 timeout=None,
                 reissue_time=None,
                 userid_checker=None):
        self.secret = secret
        self.cookie_name = cookie_name
        self.include_ip = include_ip
        self.secure = secure
        if timeout and ((not reissue_time) or (reissue_time > timeout)):
            raise ValueError('When timeout is specified, reissue_time must '
                             'be set to a lower value')
        self.timeout = timeout
        self.reissue_time = reissue_time
        self.userid_checker = userid_checker

    # IIdentifier
    def identify(self, environ):
        cookies = get_cookies(environ)
        cookie = cookies.get(self.cookie_name)

        if cookie is None or not cookie.value:
            return None

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        try:
            timestamp, userid, tokens, user_data = auth_tkt.parse_ticket(
                self.secret, cookie.value, remote_addr)
        except auth_tkt.BadTicket:
            return None

        if self.timeout and ((timestamp + self.timeout) < time.time()):
            return None

        userid_typename = 'userid_type:'
        user_data_info = user_data.split('|')
        for datum in filter(None, user_data_info):
            if datum.startswith(userid_typename):
                userid_type = datum[len(userid_typename):]
                decoder = self.userid_type_decoders.get(userid_type)
                if decoder:
                    userid = decoder(userid)

        environ['REMOTE_USER_TOKENS'] = tokens
        environ['REMOTE_USER_DATA'] = user_data
        environ['AUTH_TYPE'] = 'cookie'

        identity = {}
        identity['timestamp'] = timestamp
        identity['repoze.who.plugins.auth_tkt.userid'] = userid
        identity['tokens'] = tokens
        identity['userdata'] = user_data
        return identity

    # IIdentifier
    def forget(self, environ, identity):
        # return a set of expires Set-Cookie headers
        return self._get_cookies(environ, 'INVALID', 0)

    # IIdentifier
    def remember(self, environ, identity):
        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        cookies = get_cookies(environ)
        old_cookie = cookies.get(self.cookie_name)
        existing = cookies.get(self.cookie_name)
        old_cookie_value = getattr(existing, 'value', None)
        max_age = identity.get('max_age', None)

        timestamp, userid, tokens, userdata = None, '', (), ''

        if old_cookie_value:
            try:
                timestamp, userid, tokens, userdata = auth_tkt.parse_ticket(
                    self.secret, old_cookie_value, remote_addr)
            except auth_tkt.BadTicket:
                pass
        tokens = tuple(tokens)

        who_userid = identity['repoze.who.userid']
        who_tokens = tuple(identity.get('tokens', ()))
        who_userdata = identity.get('userdata', '')

        encoding_data = self.userid_type_encoders.get(type(who_userid))
        if encoding_data:
            encoding, encoder = encoding_data
            who_userid = encoder(who_userid)
            # XXX we are discarding the userdata passed in the identity?
            who_userdata = 'userid_type:%s' % encoding

        old_data = (userid, tokens, userdata)
        new_data = (who_userid, who_tokens, who_userdata)

        if old_data != new_data or (self.reissue_time and (
            (timestamp + self.reissue_time) < time.time())):
            ticket = auth_tkt.AuthTicket(self.secret,
                                         who_userid,
                                         remote_addr,
                                         tokens=who_tokens,
                                         user_data=who_userdata,
                                         cookie_name=self.cookie_name,
                                         secure=self.secure)
            new_cookie_value = ticket.cookie_value()

            if old_cookie_value != new_cookie_value:
                # return a set of Set-Cookie headers
                return self._get_cookies(environ, new_cookie_value, max_age)

    # IAuthenticator
    def authenticate(self, environ, identity):
        userid = identity.get('repoze.who.plugins.auth_tkt.userid')
        if userid is None:
            return None
        if self.userid_checker and not self.userid_checker(userid):
            return None
        identity['repoze.who.userid'] = userid
        return userid

    def _get_cookies(self, environ, value, max_age=None):
        if max_age is not None:
            max_age = int(max_age)
            later = _now() + datetime.timedelta(seconds=max_age)
            # Wdy, DD-Mon-YY HH:MM:SS GMT
            expires = later.strftime('%a, %d %b %Y %H:%M:%S')
            # the Expires header is *required* at least for IE7 (IE7 does
            # not respect Max-Age)
            max_age = "; Max-Age=%s; Expires=%s" % (max_age, expires)
        else:
            max_age = ''

        secure = ''
        if self.secure:
            secure = '; secure; HttpOnly'

        cur_domain = environ.get('HTTP_HOST', environ.get('SERVER_NAME'))
        cur_domain = cur_domain.split(':')[0]  # drop port
        wild_domain = '.' + cur_domain
        cookies = [('Set-Cookie', '%s="%s"; Path=/%s%s' %
                    (self.cookie_name, value, max_age, secure)),
                   ('Set-Cookie', '%s="%s"; Path=/; Domain=%s%s%s' %
                    (self.cookie_name, value, cur_domain, max_age, secure)),
                   ('Set-Cookie', '%s="%s"; Path=/; Domain=%s%s%s' %
                    (self.cookie_name, value, wild_domain, max_age, secure))]
        return cookies

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, id(self)
                            )  #pragma NO COVERAGE
Esempio n. 40
0
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True)
Esempio n. 41
0
 def __str__(self):
     if self.is_null():
         return '<null>'
     slc = self.utf8()
     s, _ = codecs.utf_8_decode(slc)
     return s
Esempio n. 42
0
 def decode_utf8(x):
     return codecs.utf_8_decode(x)[0]
Esempio n. 43
0
class AuthTktCookiePlugin(object):

    userid_typename = 'userid_type'
    userid_type_decoders = {'int': int,
                            'unicode': lambda x: utf_8_decode(x)[0],
                           }

    userid_type_encoders = {int: ('int', str),
                           }
    try:
        userid_type_encoders[long] = ('int', str)
    except NameError: #pragma NO COVER Python >= 3.0
        pass
    try:
        userid_type_encoders[unicode] = ('unicode',
                                         lambda x: utf_8_encode(x)[0])
    except NameError: #pragma NO COVER Python >= 3.0
        pass
 
    def __init__(self, secret, cookie_name='auth_tkt',
                 secure=False, include_ip=False,
                 timeout=None, reissue_time=None, userid_checker=None):
        self.secret = secret
        self.cookie_name = cookie_name
        self.include_ip = include_ip
        self.secure = secure
        if timeout and ( (not reissue_time) or (reissue_time > timeout) ):
            raise ValueError('When timeout is specified, reissue_time must '
                             'be set to a lower value')
        self.timeout = timeout
        self.reissue_time = reissue_time
        self.userid_checker = userid_checker

    # IIdentifier
    def identify(self, environ):
        logger.debug('identify: START')
        logger.debug('identify -- Environ: {0}'.format(environ))
        cookies = get_cookies(environ)
        logger.debug('identify -- Cookies: {0} '.format(cookies))
        logger.debug('identify -- Cookie Name: {0} '.format(self.cookie_name))
        cookie = cookies.get(self.cookie_name)
        logger.debug('identify -- Cookie: {0} '.format(cookie))

        if cookie is None or not cookie.value:
            return None

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'
        
        try:
            timestamp, userid, tokens, user_data = auth_tkt.parse_ticket(
                self.secret, cookie.value, remote_addr)
        except auth_tkt.BadTicket:
            return None

        if self.timeout and ( (timestamp + self.timeout) < time.time() ):
            return None

        user_data_dict = dict(parse_qsl(user_data))
        userid_type = user_data_dict.get(self.userid_typename)
        if userid_type:
            decoder = self.userid_type_decoders.get(userid_type)
            if decoder:
                userid = decoder(userid)
            
        environ['REMOTE_USER_TOKENS'] = tokens
        environ['REMOTE_USER_DATA'] = user_data
        environ['AUTH_TYPE'] = 'cookie'

        identity = {}
        identity['timestamp'] = timestamp
        identity['repoze.who.plugins.auth_tkt.userid'] = userid
        identity['tokens'] = tokens
        identity['userdata'] = user_data_dict
        return identity

    # IIdentifier
    def forget(self, environ, identity):
        logger.debug('forget: Start')
        # return a set of expires Set-Cookie headers
        return self._get_cookies(environ, 'INVALID', 0)
    
    # IIdentifier
    def remember(self, environ, identity):
        logger.debug('remember: START')
        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        cookies = get_cookies(environ)
        logger.debug('remember -- cookies: %s' % (cookies))
        old_cookie = cookies.get(self.cookie_name)
        logger.debug('remember -- old_cookie: %s' % (old_cookie))
        existing = cookies.get(self.cookie_name)
        logger.debug('remember -- existing cookies: %s' % (existing))
        old_cookie_value = getattr(existing, 'value', None)
        logger.debug('remember -- old_cookie_value: %s' % (old_cookie_value))
        max_age = identity.get('max_age', None)
        logger.debug('remember -- max_age: %s' % (max_age))

        timestamp, userid, tokens, userdata = None, '', (), ''

        if old_cookie_value:
            try:
                timestamp,userid,tokens,userdata = auth_tkt.parse_ticket(
                    self.secret, old_cookie_value, remote_addr)
            except auth_tkt.BadTicket:
                pass
        tokens = tuple(tokens)

        who_userid = identity['repoze.who.userid']
        logger.debug('remember -- who_userid: {0}'.format(who_userid))
        who_tokens = tuple(identity.get('tokens', ()))
        logger.debug('remember -- who_token: {0}'.format(who_tokens))
        who_userdata_dict = identity.get('userdata', {})
        logger.debug('remember -- who_userdata_dict: {0}'.format(who_userdata_dict))

        encoding_data = self.userid_type_encoders.get(type(who_userid))
        if encoding_data:
            logger.debug('remember -- Encoding Data')
            encoding, encoder = encoding_data
            who_userid = encoder(who_userid)
            who_userdata_dict[self.userid_typename] = encoding

        who_userdata = urlencode(who_userdata_dict)

        old_data = (userid, tokens, userdata)
        new_data = (who_userid, who_tokens, who_userdata)

        logger.debug('remember -- old_data: {0}'.format(old_data))
        logger.debug('remember -- new_data : {0}'.format(new_data ))

        if old_data != new_data or (self.reissue_time and
                ( (timestamp + self.reissue_time) < time.time() )):
            ticket = auth_tkt.AuthTicket(
                self.secret,
                who_userid,
                remote_addr,
                tokens=who_tokens,
                user_data=who_userdata,
                cookie_name=self.cookie_name,
                secure=self.secure)
            new_cookie_value = ticket.cookie_value()

            logger.debug('remember -- old_cookie_value : {0}'.format(old_cookie_value ))
            logger.debug('remember -- new_cookie_value : {0}'.format(new_cookie_value ))
            if old_cookie_value != new_cookie_value:
                # return a set of Set-Cookie headers
                return self._get_cookies(environ, new_cookie_value, max_age)

    # IAuthenticator
    def authenticate(self, environ, identity):
        logger.debug('authenticate: Start')
        userid = identity.get('repoze.who.plugins.auth_tkt.userid')
        logger.debug('authenticate -- UserID : %s' % userid)
        if userid is None:
            return None
        if self.userid_checker and not self.userid_checker(userid):
            return None
        identity['repoze.who.userid'] = userid
        return userid

    def _get_cookies(self, environ, value, max_age=None):
        logger.debug('_get_cookies: Start')
        if max_age is not None:
            max_age = int(max_age)
            later = _utcnow() + datetime.timedelta(seconds=max_age)
            # Wdy, DD-Mon-YY HH:MM:SS GMT
            expires = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
                _weekdayname[later.weekday()],
                later.day,
                _monthname[later.month],
                later.year,
                later.hour,
                later.minute,
                later.second,
            )
            # the Expires header is *required* at least for IE7 (IE7 does
            # not respect Max-Age)
            max_age = "; Max-Age=%s; Expires=%s" % (max_age, expires)
        else:
            max_age = ''

        secure = ''
        if self.secure:
            secure = '; secure; HttpOnly'

        cur_domain = environ.get('HTTP_HOST', environ.get('SERVER_NAME'))
        cur_domain = cur_domain.split(':')[0] # drop port
        wild_domain = '.' + cur_domain
        cookies = [
            ('Set-Cookie', '%s="%s"; Path=/%s%s' % (
            self.cookie_name, value, max_age, secure)),
            ('Set-Cookie', '%s="%s"; Path=/; Domain=%s%s%s' % (
            self.cookie_name, value, cur_domain, max_age, secure)),
            ('Set-Cookie', '%s="%s"; Path=/; Domain=%s%s%s' % (
            self.cookie_name, value, wild_domain, max_age, secure))
            ]
        return cookies

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                            id(self)) #pragma NO COVERAGE
Esempio n. 44
0
    def convert(self, source):
        """
        Convert markdown to serialized XHTML or HTML.

        Keyword arguments:

        * source: Source text as a Unicode string.

        Markdown processing takes place in five steps:

        1. A bunch of "preprocessors" munge the input text.
        2. BlockParser() parses the high-level structural elements of the
           pre-processed text into an ElementTree.
        3. A bunch of "treeprocessors" are run against the ElementTree. One 
           such treeprocessor runs InlinePatterns against the ElementTree, 
           detecting inline markup.
        4. Some post-processors are run against the text after the ElementTree 
           has been serialized into text.
        5. The output is written to a string.

        """

        # Fixup the source text
        if not source.strip():
            return u""  # a blank unicode string
        try:
            source = unicode(source)
        except UnicodeDecodeError:
            message(
                CRITICAL,
                'UnicodeDecodeError: Markdown only accepts unicode or ascii input.'
            )
            return u""

        source = source.replace(util.STX, "").replace(util.ETX, "")
        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        source = re.sub(r'\n\s+\n', '\n\n', source)
        source = source.expandtabs(self.tab_length)

        # Split into lines and run the line preprocessors.
        self.lines = source.split("\n")
        for prep in self.preprocessors.values():
            self.lines = prep.run(self.lines)

        # Parse the high-level elements.
        root = self.parser.parseDocument(self.lines).getroot()

        # Run the tree-processors
        for treeprocessor in self.treeprocessors.values():
            newRoot = treeprocessor.run(root)
            if newRoot:
                root = newRoot

        # Serialize _properly_.  Strip top-level tags.
        output, length = codecs.utf_8_decode(
            self.serializer(root, encoding="utf-8"))
        if self.stripTopLevelTags:
            try:
                start = output.index('<%s>' % self.doc_tag) + len(
                    self.doc_tag) + 2
                end = output.rindex('</%s>' % self.doc_tag)
                output = output[start:end].strip()
            except ValueError:
                if output.strip().endswith('<%s />' % self.doc_tag):
                    # We have an empty document
                    output = ''
                else:
                    # We have a serious problem
                    message(CRITICAL, 'Failed to strip top level tags.')

        # Run the text post-processors
        for pp in self.postprocessors.values():
            output = pp.run(output)

        return output.strip()
Esempio n. 45
0
 def decode(input, errors="strict"):
     return codecs.utf_8_decode(input, errors)
Esempio n. 46
0
def main():

    parser = argparse.ArgumentParser(description='dump output from workbooks')
    parser.add_argument('--server', '-s', required=True, help='server address')
    parser.add_argument('--site', '-S', default='')
    parser.add_argument('--project',
                        required=True,
                        default=None,
                        help='project in which to search workbooks')
    parser.add_argument('--username',
                        '-u',
                        help='username to sign into server')
    parser.add_argument('-p', '--password', default=None)
    parser.add_argument('--filepath',
                        '-f',
                        required=True,
                        help='filepath to save the image(s) returned')
    parser.add_argument('--refresh',
                        '-r',
                        action='store_true',
                        help='refresh the workbook before extracting data')

    parser.add_argument('--logging-level',
                        '-l',
                        choices=['debug', 'info', 'error'],
                        default='error',
                        help='desired logging level (set to error by default)')

    parser.add_argument(
        'workbook',
        help='one or more workbooks to process, "all" means all workbooks',
        nargs='+')

    args = parser.parse_args()

    if args.password is None:
        password = getpass.getpass("Password: "******"2.5"

    with server.auth.sign_in(tableau_auth):
        server.use_server_version()
        # Step 2: Query for the workbook that we want data of
        for wb in TSC.Pager(server.workbooks):
            if (args.workbook[0] == "all" or wb.name
                    in args.workbook) and wb.project_name == args.project:
                server.workbooks.populate_views(wb)
                if args.refresh:
                    try:
                        server.workbooks.refresh(wb.id)
                    except Exception as e:
                        logging.error(
                            "workbook[{0}]: refresh failed [{1}]".format(
                                wb.name, e))
                        filename = os.path.join(
                            args.filepath, urllib.parse.quote(wb.name, ' '))
                        write_failed_file(filename)
                        continue
                for view in wb.views:
                    # Step 3: Query the CSV endpoint and save the data to the specified location
                    server.views.populate_csv(view)
                    filename = os.path.join(
                        args.filepath, urllib.parse.quote(wb.name, ' '),
                        urllib.parse.quote(view.name, ' ')) + ".csv"
                    try:
                        os.makedirs(os.path.dirname(filename), exist_ok=True)
                        with open(filename, "w", newline='',
                                  encoding='utf-8') as csv_file:
                            writer = csv.writer(csv_file, delimiter=';')
                            data = b''
                            for chunk in view.csv:
                                data += chunk
                            data, size = codecs.utf_8_decode(data)
                            reader = csv.reader(data.splitlines())
                            writer.writerows(reader)
                            csv_file.close()
                        logging.info(
                            "workbook[{0}], view[{1}]: CSV saved to [{2}]".
                            format(wb.name, view.name, filename))
                    except Exception as e:
                        logging.error(
                            "workbook[{0}], view[{1}]: CSV could not be retrieved [{2}]"
                            .format(wb.name, view.name, e))
                        write_failed_file(filename)
Esempio n. 47
0
 def u(x):
     # Produces a byte string from a unicode object
     return codecs.utf_8_decode(x)[0]
Esempio n. 48
0
    else:
        labels = [_('Daily'), _('Weekly'), _('Monthly'), _('Yearly')]
    mapping = dict(zip(['daily', 'weekly', 'monthly', 'yearly'], labels))
    return mapping.get(recurrence.unit,
                       recurrence.amount) % dict(frequency=recurrence.amount)


def budget(aBudget):
    ''' render budget (of type date.TimeDelta) as
    "<hours>:<minutes>:<seconds>". '''
    return timeSpent(aBudget)


try:
    dateFormat = '%x'  # Apparently, this may produce invalid utf-8 so test
    codecs.utf_8_decode(datemodule.Now().strftime(dateFormat))
except UnicodeDecodeError:
    dateFormat = '%Y-%m-%d'
timeFormat = '%H:%M'  # Alas, %X includes seconds
dateTimeFormat = ' '.join([dateFormat, timeFormat])


def date(date):
    ''' render a date (of type date.Date) '''
    if str(date) == '':
        return ''
    return date.strftime(dateFormat)


def dateTime(dateTime):
    if not dateTime or dateTime == datemodule.DateTime():
Esempio n. 49
0
    lastmaxid = maxid

    writelast(Journal, lastsync, lastmaxid)

    if Username == Journal:
        print "Fetching userpics for: %s" % Username
        f = open("%s/userpics.xml" % Username, "w")
        print >>f, """<?xml version="1.0"?>"""
        print >>f, "<userpics>"
        for p in userpics:
            print >>f, """<userpic keyword="%s" url="%s" />""" % (p, userpics[p])
            pic = urllib2.urlopen(userpics[p])
            ext = MimeExtensions.get(pic.info()["Content-Type"], "")
            picfn = re.sub(r'[*?\\/:<>"|]', "_", p)
            try:
                picfn = codecs.utf_8_decode(picfn)[0]
                picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
            except:
                # for installations where the above utf_8_decode doesn't work
                picfn = "".join([ord(x) < 128 and x or "_" for x in picfn])
                picf = open("%s/%s%s" % (Username, picfn, ext), "wb")
            shutil.copyfileobj(pic, picf)
            pic.close()
            picf.close()
        print >>f, "</userpics>"
        f.close()

    if origlastsync:
        print "%d new entries, %d new comments (since %s)" % (newentries, newcomments, origlastsync)
    else:
        print "%d new entries, %d new comments" % (newentries, newcomments)
Esempio n. 50
0
def render_abstract(outfile, abstract, start_page=None):
    """ Writes the LaTeX string corresponding to one abstract.
    """
    if start_page is not None:
        outfile.write(r"""
\setcounter{page}{%i}
""" % start_page)
    else:
        if hasattr(abstract, 'start_page'):
            start_page = abstract.start_page
        else:
            start_page = 1
    if not abstract.authors:
        author_list = abstract.owners
    else:
        author_list = abstract.authors
    authors = []
    for author in author_list:
        # If the author has no surname, he is not an author
        if author.surname:
            if author.email_address:
                email = r'(\email{%s})' % author.email_address
            else:
                email = ''
            authors.append(
                ur'''\otherauthors{
                            %s %s
                            %s --
                            \address{%s, %s \sc{%s}}
                            }''' %
                (author.first_names, author.surname, email, author.institution,
                 author.address, author.country))
    if authors:
        authors = u'\n'.join(authors)
        authors += r'\addauthorstoc{%s}' % ', '.join(
            '%s. %s' % (author.first_names[0], author.surname)
            for author in author_list)
        author_cite_list = [
            '%s. %s' % (a.first_names[0], a.surname) for a in author_list
        ]
        if len(author_cite_list) > 4:
            author_cite_list = author_cite_list[:3]
            author_cite_list.append('et al.')
        citation = ', '.join(author_cite_list) + \
        'in Proc. %s %s, %s (Eds) ' \
        % (conf, year, editors)
        copyright = '\\copyright %s, %s' % (year, ', '.join(author_cite_list))
    else:
        authors = ''
        citation = 'Citation'
        copyright = 'Copyright'
    if hasattr(abstract, 'num_pages'):
        citation += 'pp. %i--%i' % (start_page,
                                    start_page + abstract.num_pages)
    else:
        citation += 'p. %i' % start_page
    if hasattr(abstract, 'number'):
        abstract.url = 'http://conference.scipy.org/proceedings/%s/paper_%i' \
        % (conf_name, abstract.number)
        url = r'\url{%s}' % abstract.url
    else:
        url = ''
    paper_text = abstract.paper_text
    if paper_text == '':
        paper_text = abstract.summary
    # XXX: It doesn't seem to be right to be doing this, but I get a
    # nasty UnicodeDecodeError on some rare abstracts, elsewhere.
    paper_text = codecs.utf_8_decode(
        hack_include_graphics(rst2latex(paper_text)))[0]
    paper_abstract = abstract.paper_abstract
    if paper_abstract is None:
        paper_abstract = ''
    if not paper_abstract == '':
        paper_abstract = ur'\begin{abstract}%s\end{abstract}' % \
                    paper_abstract#.encode('utf-8')
    abstract_dict = {
        'text': paper_text.encode('utf-8'),
        'abstract': paper_abstract.encode('utf-8'),
        'authors': authors.encode('utf-8'),
        'title': abstract.title.encode('utf-8'),
        'citation': citation.encode('utf-8'),
        'copyright': copyright.encode('utf-8'),
        'url': url.encode('utf-8'),
    }
    outfile.write(
        codecs.utf_8_decode(ur'''
\phantomsection
\hypertarget{chapter}{} 
\vspace*{-2em}

\resetheadings{%(title)s}{%(citation)s}{%(url)s}{%(copyright)s}
\title{%(title)s}

\begin{minipage}{\linewidth}
%(authors)s
\end{minipage}

\noindent\rule{\linewidth}{0.2ex}
\vspace*{-0.5ex}
\twocolumngrid
%(abstract)s

\sloppy

%(text)s

\fussy
\onecolumngrid
\smallskip
\vfill
\filbreak
\clearpage

'''.encode('utf-8') % abstract_dict)[0])
Esempio n. 51
0
def parsePyFuncDoc(doc, fallbackCallSig=None, scope="?", funcname="?"):
    """Parse the given Python function/method doc-string into call-signature
    and description bits.

        "doc" is the function doc string.
        "fallbackCallSig" (optional) is a list of call signature lines to
            fallback to if one cannot be determined from the doc string.
        "scope" (optional) is the module/class parent scope name. This
            is just used for better error/log reporting.
        "funcname" (optional) is the function name. This is just used for
            better error/log reporting.

    Examples of doc strings with call-signature info:
        close(): explicitly release resources held.
        x.__repr__() <==> repr(x)
        read([s]) -- Read s characters, or the rest of the string
        recv(buffersize[, flags]) -> data
        replace (str, old, new[, maxsplit]) -> string
        class StringIO([buffer])

    Returns a 2-tuple: (<call-signature-lines>, <description-lines>)
    """
    if doc is None or not doc.strip():
        return ([], [])

    limit = LINE_LIMIT
    if not isinstance(doc, str):
        # try to convert from utf8 to unicode; if we fail, too bad.
        try:
            doc = codecs.utf_8_decode(doc)[0]
        except UnicodeDecodeError:
            pass
    doclines = doc.splitlines(0)
    index = 0
    siglines = []
    desclines = []

    # Skip leading blank lines.
    while index < len(doclines):
        if doclines[index].strip():
            break
        index += 1

    # Parse out the call signature block, if it looks like there is one.
    if index >= len(doclines):
        match = None
    else:
        first = doclines[index].strip()
        match = _gPySigLinePat.match(first)
    if match:
        # The 'doc' looks like it starts with a call signature block.
        for i, line in enumerate(doclines[index:]):
            if len(siglines) >= limit:
                index = i
                break
            stripped = line.strip()
            if not stripped:
                index = i
                break
            match = _gPySigLinePat.match(stripped)
            if not match:
                index = i
                break
            # Now parse off what may be description content on the same line.
            #   ":", "-" or "--" separator: tail is description
            #   "-->" or "->" separator: tail if part of call sig
            #   "<==>" separator: tail if part of call sig
            #   other separtor: leave as part of call sig for now
            descSeps = ("-", "--", ":")
            groupd = match.groupdict()
            retval, head, sep, tail = (
                groupd.get("retval"), groupd.get("head"),
                groupd.get("sep"), groupd.get("tail"))
            if retval:
                siglines.append(head + " -> " + retval)
                if tail and sep in descSeps:
                    desclines.append(tail)
            elif tail and sep in descSeps:
                siglines.append(head)
                desclines.append(tail)
            else:
                siglines.append(stripped)
        else:
            index = len(doclines)
    if not siglines and fallbackCallSig:
        siglines = fallbackCallSig

    # Parse out the description block.
    if desclines:
        # Use what we have already. Just need to wrap it.
        desclines = textwrap.wrap(' '.join(desclines), LINE_WIDTH)
    else:
        doclines = doclines[index:]
        # strip leading empty lines
        while len(doclines) > 0 and not doclines[0].rstrip():
            del doclines[0]
        try:
            skip_first_line = (doclines[0][0] not in (" \t"))
        except IndexError:
            skip_first_line = False  # no lines, or first line is empty
        desclines = dedent("\n".join(
            doclines), skip_first_line=skip_first_line)
        desclines = desclines.splitlines(0)

    ## debug logging
    # f = open("parsePyFuncDoc.log", "a")
    # if 0:
    #    f.write("\n---- %s:\n" % funcname)
    #    f.write(pformat(siglines)+"\n")
    #    f.write(pformat(desclines)+"\n")
    # else:
    #    f.write("\n")
    #    if siglines:
    #        f.write("\n".join(siglines)+"\n")
    #    else:
    #        f.write("<no signature for '%s.%s'>\n" % (scope, funcname))
    #    for descline in desclines:
    #        f.write("\t%s\n" % descline)
    # f.close()

    return (siglines, desclines)
Esempio n. 52
0
 def test_utf_8_decode(self):
     #sanity
     new_str, num_processed = codecs.utf_8_decode(b"abc")
     self.assertEqual(new_str, 'abc')
     self.assertEqual(num_processed, 3)
Esempio n. 53
0
    def _add_msg_object(self, parent, path, name, obj, obj_type):
        label = name

        if hasattr(obj, '__slots__'):
            subobjs = [(slot, getattr(obj, slot)) for slot in obj.__slots__]
        elif type(obj) in [list, tuple]:
            len_obj = len(obj)
            if len_obj == 0:
                subobjs = []
            else:
                w = int(math.ceil(math.log10(len_obj)))
                subobjs = [('[%*d]' % (w, i), subobj)
                           for (i, subobj) in enumerate(obj)]
        else:
            subobjs = []

        if type(obj) in [int, long, float]:
            if type(obj) == float:
                obj_repr = '%.6f' % obj
            else:
                obj_repr = str(obj)

            if obj_repr[0] == '-':
                label += ': %s' % obj_repr
            else:
                label += ':  %s' % obj_repr

        elif type(obj) in [str, bool, int, long, float, complex, rospy.Time]:
            # Ignore any binary data
            obj_repr = codecs.utf_8_decode(str(obj), 'ignore')[0]

            # Truncate long representations
            if len(obj_repr) >= 50:
                obj_repr = obj_repr[:50] + '...'

            label += ': ' + obj_repr
        item = QTreeWidgetItem([label])
        if name == 'msg':
            pass
        elif path.find('.') == -1 and path.find('[') == -1:
            self.addTopLevelItem(item)
        else:
            parent.addChild(item)
        item.setData(0, Qt.UserRole, (path, obj_type))

        for subobj_name, subobj in subobjs:
            if subobj is None:
                continue

            if path == '':
                subpath = subobj_name  # root field
            elif subobj_name.startswith('['):
                subpath = '%s%s' % (path, subobj_name)  # list, dict, or tuple
            else:
                subpath = '%s.%s' % (path, subobj_name
                                     )  # attribute (prefix with '.')

            if hasattr(subobj, '_type'):
                subobj_type = subobj._type
            else:
                subobj_type = type(subobj).__name__

            self._add_msg_object(item, subpath, subobj_name, subobj,
                                 subobj_type)
Esempio n. 54
0
class AuthTktCookieHelper(object):
    """
    A helper class for use in third-party authentication policy
    implementations.  See
    :class:`pyramid.authentication.AuthTktAuthenticationPolicy` for the
    meanings of the constructor arguments.
    """
    parse_ticket = staticmethod(parse_ticket)  # for tests
    AuthTicket = AuthTicket  # for tests
    BadTicket = BadTicket  # for tests
    now = None  # for tests

    userid_type_decoders = {
        'int': int,
        'unicode': lambda x: utf_8_decode(x)[0],  # bw compat for old cookies
        'b64unicode': lambda x: utf_8_decode(b64decode(x))[0],
        'b64str': lambda x: b64decode(x),
    }

    userid_type_encoders = {
        int: ('int', str),
        long: ('int', str),
        text_type: ('b64unicode', lambda x: b64encode(utf_8_encode(x)[0])),
        binary_type: ('b64str', lambda x: b64encode(x)),
    }

    def __init__(self,
                 secret,
                 cookie_name='auth_tkt',
                 secure=False,
                 include_ip=False,
                 timeout=None,
                 reissue_time=None,
                 max_age=None,
                 http_only=False,
                 path="/",
                 wild_domain=True):
        self.secret = secret
        self.cookie_name = cookie_name
        self.include_ip = include_ip
        self.secure = secure
        self.timeout = timeout
        self.reissue_time = reissue_time
        self.max_age = max_age
        self.http_only = http_only
        self.path = path
        self.wild_domain = wild_domain

        static_flags = []
        if self.secure:
            static_flags.append('; Secure')
        if self.http_only:
            static_flags.append('; HttpOnly')
        self.static_flags = "".join(static_flags)

    def _get_cookies(self, environ, value, max_age=None):
        if max_age is EXPIRE:
            max_age = "; Max-Age=0; Expires=Wed, 31-Dec-97 23:59:59 GMT"
        elif max_age is not None:
            later = datetime.datetime.utcnow() + datetime.timedelta(
                seconds=int(max_age))
            # Wdy, DD-Mon-YY HH:MM:SS GMT
            expires = later.strftime('%a, %d %b %Y %H:%M:%S GMT')
            # the Expires header is *required* at least for IE7 (IE7 does
            # not respect Max-Age)
            max_age = "; Max-Age=%s; Expires=%s" % (max_age, expires)
        else:
            max_age = ''

        cur_domain = environ.get('HTTP_HOST', environ.get('SERVER_NAME'))

        # While Chrome, IE, and Firefox can cope, Opera (at least) cannot
        # cope with a port number in the cookie domain when the URL it
        # receives the cookie from does not also have that port number in it
        # (e.g via a proxy).  In the meantime, HTTP_HOST is sent with port
        # number, and neither Firefox nor Chrome do anything with the
        # information when it's provided in a cookie domain except strip it
        # out.  So we strip out any port number from the cookie domain
        # aggressively to avoid problems.  See also
        # https://github.com/Pylons/pyramid/issues/131
        if ':' in cur_domain:
            cur_domain = cur_domain.split(':', 1)[0]

        cookies = [
            ('Set-Cookie', '%s="%s"; Path=%s%s%s' %
             (self.cookie_name, value, self.path, max_age, self.static_flags)),
            ('Set-Cookie', '%s="%s"; Path=%s; Domain=%s%s%s' %
             (self.cookie_name, value, self.path, cur_domain, max_age,
              self.static_flags)),
        ]

        if self.wild_domain:
            wild_domain = '.' + cur_domain
            cookies.append(('Set-Cookie', '%s="%s"; Path=%s; Domain=%s%s%s' %
                            (self.cookie_name, value, self.path, wild_domain,
                             max_age, self.static_flags)))

        return cookies

    def identify(self, request):
        """ Return a dictionary with authentication information, or ``None``
        if no valid auth_tkt is attached to ``request``"""
        environ = request.environ
        cookie = request.cookies.get(self.cookie_name)

        if cookie is None:
            return None

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        try:
            timestamp, userid, tokens, user_data = self.parse_ticket(
                self.secret, cookie, remote_addr)
        except self.BadTicket:
            return None

        now = self.now  # service tests

        if now is None:
            now = time_mod.time()

        if self.timeout and ((timestamp + self.timeout) < now):
            # the auth_tkt data has expired
            return None

        userid_typename = 'userid_type:'
        user_data_info = user_data.split('|')
        for datum in filter(None, user_data_info):
            if datum.startswith(userid_typename):
                userid_type = datum[len(userid_typename):]
                decoder = self.userid_type_decoders.get(userid_type)
                if decoder:
                    userid = decoder(userid)

        reissue = self.reissue_time is not None

        if reissue and not hasattr(request, '_authtkt_reissued'):
            if ((now - timestamp) > self.reissue_time):
                # work around https://github.com/Pylons/pyramid/issues#issue/108
                tokens = list(filter(None, tokens))
                headers = self.remember(request,
                                        userid,
                                        max_age=self.max_age,
                                        tokens=tokens)

                def reissue_authtkt(request, response):
                    if not hasattr(request, '_authtkt_reissue_revoked'):
                        for k, v in headers:
                            response.headerlist.append((k, v))

                request.add_response_callback(reissue_authtkt)
                request._authtkt_reissued = True

        environ['REMOTE_USER_TOKENS'] = tokens
        environ['REMOTE_USER_DATA'] = user_data
        environ['AUTH_TYPE'] = 'cookie'

        identity = {}
        identity['timestamp'] = timestamp
        identity['userid'] = userid
        identity['tokens'] = tokens
        identity['userdata'] = user_data
        return identity

    def forget(self, request):
        """ Return a set of expires Set-Cookie headers, which will destroy
        any existing auth_tkt cookie when attached to a response"""
        environ = request.environ
        request._authtkt_reissue_revoked = True
        return self._get_cookies(environ, '', max_age=EXPIRE)

    def remember(self, request, userid, max_age=None, tokens=()):
        """ Return a set of Set-Cookie headers; when set into a response,
        these headers will represent a valid authentication ticket.

        ``max_age``
          The max age of the auth_tkt cookie, in seconds.  When this value is
          set, the cookie's ``Max-Age`` and ``Expires`` settings will be set,
          allowing the auth_tkt cookie to last between browser sessions.  If
          this value is ``None``, the ``max_age`` value provided to the
          helper itself will be used as the ``max_age`` value.  Default:
          ``None``.

        ``tokens``
          A sequence of strings that will be placed into the auth_tkt tokens
          field.  Each string in the sequence must be of the Python ``str``
          type and must match the regex ``^[A-Za-z][A-Za-z0-9+_-]*$``.
          Tokens are available in the returned identity when an auth_tkt is
          found in the request and unpacked.  Default: ``()``.
        """
        if max_age is None:
            max_age = self.max_age

        environ = request.environ

        if self.include_ip:
            remote_addr = environ['REMOTE_ADDR']
        else:
            remote_addr = '0.0.0.0'

        user_data = ''

        encoding_data = self.userid_type_encoders.get(type(userid))

        if encoding_data:
            encoding, encoder = encoding_data
            userid = encoder(userid)
            user_data = 'userid_type:%s' % encoding

        new_tokens = []
        for token in tokens:
            if isinstance(token, text_type):
                try:
                    token = ascii_native_(token)
                except UnicodeEncodeError:
                    raise ValueError("Invalid token %r" % (token, ))
            if not (isinstance(token, str) and VALID_TOKEN.match(token)):
                raise ValueError("Invalid token %r" % (token, ))
            new_tokens.append(token)
        tokens = tuple(new_tokens)

        if hasattr(request, '_authtkt_reissued'):
            request._authtkt_reissue_revoked = True

        ticket = self.AuthTicket(self.secret,
                                 userid,
                                 remote_addr,
                                 tokens=tokens,
                                 user_data=user_data,
                                 cookie_name=self.cookie_name,
                                 secure=self.secure)

        cookie_value = ticket.cookie_value()
        return self._get_cookies(environ, cookie_value, max_age)
Esempio n. 55
0
 def test_utf_8_decode(self):
     #sanity
     new_str, size = codecs.utf_8_decode("abc")
     self.assertEqual(new_str, u'abc')
     self.assertEqual(size, 3)
Esempio n. 56
0
def fromiter(iterable, chunksize=1024, maskmissing=True, references=False):
    if references:
        raise NotImplementedError  # keep all ids in a hashtable to create pointers (IndexedArray)

    tobytes = lambda x: x.tobytes()
    tostring = lambda x: codecs.utf_8_decode(x.tobytes())[0]

    def insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj):
        if len(chunks) == 0 or offsets[-1] - offsets[-2] == len(chunks[-1]):
            chunks.append(newchunk(obj))
            offsets.append(offsets[-1])

        if ismine(obj, chunks[-1]):
            chunks[-1] = promote(obj, chunks[-1])
            fillobj(obj, chunks[-1], offsets[-1] - offsets[-2])
            offsets[-1] += 1

        elif isinstance(chunks[-1], IndexedMaskedArray) and len(
                chunks[-1]._content) == 0:
            chunks[-1]._content = newchunk(obj)

            nextindex = chunks[-1]._nextindex
            chunks[-1]._nextindex += 1
            chunks[-1]._index[offsets[-1] - offsets[-2]] = nextindex

            chunks[-1]._content = promote(obj, chunks[-1]._content)
            fillobj(obj, chunks[-1]._content, nextindex)
            offsets[-1] += 1

        elif isinstance(chunks[-1], IndexedMaskedArray) and ismine(
                obj, chunks[-1]._content):
            nextindex = chunks[-1]._nextindex
            chunks[-1]._nextindex += 1
            chunks[-1]._index[offsets[-1] - offsets[-2]] = nextindex

            chunks[-1]._content = promote(obj, chunks[-1]._content)
            fillobj(obj, chunks[-1]._content, nextindex)
            offsets[-1] += 1

        elif isinstance(chunks[-1], UnionArray) and any(
                isinstance(content, IndexedMaskedArray)
                and ismine(obj, content._content)
                for content in chunks[-1]._contents):
            for tag in range(len(chunks[-1]._contents)):
                if isinstance(chunks[-1]._contents[tag],
                              IndexedMaskedArray) and ismine(
                                  obj, chunks[-1]._contents[tag]._content):
                    nextindex_union = chunks[-1]._nextindex[tag]
                    chunks[-1]._nextindex[tag] += 1

                    nextindex_mask = chunks[-1]._contents[tag]._nextindex
                    chunks[-1]._contents[tag]._nextindex += 1
                    chunks[-1]._contents[tag]._index[
                        nextindex_union] = nextindex_mask

                    chunks[-1]._contents[tag]._content = promote(
                        obj, chunks[-1]._contents[tag]._content)
                    fillobj(obj, chunks[-1]._contents[tag]._content,
                            nextindex_mask)

                    chunks[-1]._tags[offsets[-1] - offsets[-2]] = tag
                    chunks[-1]._index[offsets[-1] -
                                      offsets[-2]] = nextindex_union

                    offsets[-1] += 1
                    break

        else:
            if not isinstance(chunks[-1], UnionArray):
                chunks[-1] = UnionArray(
                    numpy.empty(
                        chunksize,
                        dtype=awkward.array.base.AwkwardArray.INDEXTYPE),
                    numpy.empty(
                        chunksize,
                        dtype=awkward.array.base.AwkwardArray.INDEXTYPE),
                    [chunks[-1]])
                chunks[-1]._nextindex = [offsets[-1] - offsets[-2]]
                chunks[-1]._tags[:offsets[-1] - offsets[-2]] = 0
                chunks[-1]._index[:offsets[-1] - offsets[-2]] = numpy.arange(
                    offsets[-1] - offsets[-2],
                    dtype=awkward.array.base.AwkwardArray.INDEXTYPE)
                chunks[-1]._contents = list(chunks[-1]._contents)

            if not any(
                    ismine(obj, content) for content in chunks[-1]._contents):
                chunks[-1]._nextindex.append(0)
                chunks[-1]._contents.append(newchunk(obj))

            for tag in range(len(chunks[-1]._contents)):
                if ismine(obj, chunks[-1]._contents[tag]):
                    nextindex = chunks[-1]._nextindex[tag]
                    chunks[-1]._nextindex[tag] += 1

                    chunks[-1]._contents[tag] = promote(
                        obj, chunks[-1]._contents[tag])
                    fillobj(obj, chunks[-1]._contents[tag], nextindex)

                    chunks[-1]._tags[offsets[-1] - offsets[-2]] = tag
                    chunks[-1]._index[offsets[-1] - offsets[-2]] = nextindex

                    offsets[-1] += 1
                    break

    def fill(obj, chunks, offsets):
        if obj is None:
            # anything with None -> IndexedMaskedArray

            if len(chunks) == 0 or offsets[-1] - offsets[-2] == len(
                    chunks[-1]):
                chunks.append(
                    IndexedMaskedArray(
                        numpy.empty(
                            chunksize,
                            dtype=awkward.array.base.AwkwardArray.INDEXTYPE),
                        []))
                chunks[-1]._nextindex = 0
                offsets.append(offsets[-1])

            if isinstance(chunks[-1], UnionArray) and any(
                    isinstance(content, IndexedMaskedArray)
                    for content in chunks[-1]._contents):
                for tag in range(len(chunks[-1]._contents)):
                    if isinstance(chunks[-1]._contents[tag],
                                  IndexedMaskedArray):
                        nextindex = chunks[-1]._nextindex[tag]
                        chunks[-1]._nextindex[tag] += 1

                        chunks[-1]._contents[tag]._index[nextindex] = chunks[
                            -1]._contents[tag]._maskedwhen

                        chunks[-1]._tags[offsets[-1] - offsets[-2]] = tag
                        chunks[-1]._index[offsets[-1] -
                                          offsets[-2]] = nextindex

                        offsets[-1] += 1
                        break

            else:
                if not isinstance(chunks[-1], IndexedMaskedArray):
                    chunks[-1] = IndexedMaskedArray(
                        numpy.empty(
                            chunksize,
                            dtype=awkward.array.base.AwkwardArray.INDEXTYPE),
                        chunks[-1])
                    chunks[-1]._index[:offsets[-1] -
                                      offsets[-2]] = numpy.arange(
                                          offsets[-1] - offsets[-2],
                                          dtype=awkward.array.base.
                                          AwkwardArray.INDEXTYPE)
                    chunks[-1]._nextindex = offsets[-1] - offsets[-2]

                chunks[-1]._index[offsets[-1] -
                                  offsets[-2]] = chunks[-1]._maskedwhen
                offsets[-1] += 1

        elif isinstance(obj, (bool, numpy.bool, numpy.bool_)):
            # bool -> Numpy bool_

            def newchunk(obj):
                return numpy.empty(chunksize, dtype=numpy.bool_)

            def ismine(obj, x):
                return isinstance(x, numpy.ndarray) and x.dtype == numpy.dtype(
                    numpy.bool_)

            def promote(obj, x):
                return x

            def fillobj(obj, array, where):
                array[where] = obj

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        elif isinstance(obj, (numbers.Integral, numpy.integer)):
            # int -> Numpy int64, float64, or complex128 (promotes to largest)

            def newchunk(obj):
                return numpy.empty(chunksize, dtype=numpy.int64)

            def ismine(obj, x):
                return isinstance(x, numpy.ndarray) and issubclass(
                    x.dtype.type, numpy.number)

            def promote(obj, x):
                return x

            def fillobj(obj, array, where):
                array[where] = obj

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        elif isinstance(obj, (numbers.Real, numpy.floating)):
            # float -> Numpy int64, float64, or complex128 (promotes to largest)

            def newchunk(obj):
                return numpy.empty(chunksize, dtype=numpy.int64)

            def ismine(obj, x):
                return isinstance(x, numpy.ndarray) and issubclass(
                    x.dtype.type, numpy.number)

            def promote(obj, x):
                if issubclass(x.dtype.type, numpy.floating):
                    return x
                else:
                    return x.astype(numpy.float64)

            def fillobj(obj, array, where):
                array[where] = obj

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        elif isinstance(
                obj, (numbers.Complex, numpy.complex, numpy.complexfloating)):
            # complex -> Numpy int64, float64, or complex128 (promotes to largest)

            def newchunk(obj):
                return numpy.empty(chunksize, dtype=numpy.complex128)

            def ismine(obj, x):
                return isinstance(x, numpy.ndarray) and issubclass(
                    x.dtype.type, numpy.number)

            def promote(obj, x):
                if issubclass(x.dtype.type, numpy.complexfloating):
                    return x
                else:
                    return x.astype(numpy.complex128)

            def fillobj(obj, array, where):
                array[where] = obj

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        elif isinstance(obj, bytes):
            # bytes -> ObjectArray of JaggedArray

            def newchunk(obj):
                out = ObjectArray(
                    tobytes,
                    JaggedArray.fromoffsets(
                        numpy.zeros(
                            chunksize + 1,
                            dtype=awkward.array.base.AwkwardArray.INDEXTYPE),
                        AppendableArray.empty(lambda: numpy.empty(
                            chunksize,
                            dtype=awkward.array.base.AwkwardArray.CHARTYPE))))
                out._content._starts[0] = 0
                return out

            def ismine(obj, x):
                return isinstance(
                    x, ObjectArray) and (x._generator is tobytes
                                         or x._generator is tostring)

            def promote(obj, x):
                return x

            def fillobj(obj, array, where):
                array._content._stops[
                    where] = array._content._starts[where] + len(obj)
                array._content._content.extend(
                    numpy.fromstring(
                        obj, dtype=awkward.array.base.AwkwardArray.CHARTYPE))

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        elif isinstance(obj, awkward.util.string):
            # str -> ObjectArray of JaggedArray

            def newchunk(obj):
                out = ObjectArray(
                    tostring,
                    JaggedArray.fromoffsets(
                        numpy.zeros(
                            chunksize + 1,
                            dtype=awkward.array.base.AwkwardArray.INDEXTYPE),
                        AppendableArray.empty(lambda: numpy.empty(
                            chunksize,
                            dtype=awkward.array.base.AwkwardArray.CHARTYPE))))
                out._content._starts[0] = 0
                return out

            def ismine(obj, x):
                return isinstance(
                    x, ObjectArray) and (x._generator is tobytes
                                         or x._generator is tostring)

            def promote(obj, x):
                if x._generator is tostring:
                    return x
                else:
                    return ObjectArray(tostring, x._content)

            def fillobj(obj, array, where):
                bytes = codecs.utf_8_encode(obj)[0]
                array._content._stops[
                    where] = array._content._starts[where] + len(bytes)
                array._content._content.extend(
                    numpy.fromstring(
                        bytes, dtype=awkward.array.base.AwkwardArray.CHARTYPE))

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        elif isinstance(obj, dict):
            # dict keys -> Table columns

            def newchunk(obj):
                return Table(chunksize,
                             collections.OrderedDict((n, []) for n in obj))

            if maskmissing:

                def ismine(obj, x):
                    return isinstance(x, Table)

                def promote(obj, x):
                    for n in obj:
                        if not n in x._content:
                            x._content[n] = IndexedMaskedArray(
                                numpy.empty(chunksize,
                                            dtype=awkward.array.base.
                                            AwkwardArray.INDEXTYPE), [])
                            x._content[n]._index[:offsets[-1] -
                                                 offsets[-2]] = x._content[
                                                     n]._maskedwhen
                            x._content[n]._nextindex = 0
                    return x

            else:

                def ismine(obj, x):
                    return isinstance(x, Table) and all(n in x._content
                                                        for n in obj)

                def promote(obj, x):
                    return x

            def fillobj(obj, array, where):
                for n in obj:
                    if len(array._content[n]) == 0:
                        subchunks = []
                        suboffsets = [offsets[-2]]
                    else:
                        subchunks = [array._content[n]]
                        suboffsets = [offsets[-2], offsets[-1]]

                    fill(obj[n], subchunks, suboffsets)
                    array._content[n] = subchunks[-1]

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        elif isinstance(obj, tuple):
            # tuple items -> Table columns

            def newchunk(obj):
                return Table(
                    chunksize,
                    collections.OrderedDict(
                        ("_" + str(i), []) for i in range(len(obj))))

            def ismine(obj, x):
                return isinstance(x, Table) and list(
                    x._content) == ["_" + str(i) for i in range(len(obj))]

            def promote(obj, x):
                return x

            def fillobj(obj, array, where):
                for i, x in enumerate(obj):
                    n = "_" + str(i)
                    if len(array._content[n]) == 0:
                        subchunks = []
                        suboffsets = [offsets[-2]]
                    else:
                        subchunks = [array._content[n]]
                        suboffsets = [offsets[-2], offsets[-1]]

                    fill(x, subchunks, suboffsets)
                    array._content[n] = subchunks[-1]

            insert(obj, chunks, offsets, newchunk, ismine, promote, fillobj)

        else:
            try:
                it = iter(obj)

            except TypeError:
                # object attributes -> Table columns

                def newchunk(obj):
                    return NamedTable(
                        chunksize, obj.__class__.__name__,
                        collections.OrderedDict((n, []) for n in dir(obj)
                                                if not n.startswith("_")))

                if maskmissing:

                    def ismine(obj, x):
                        return isinstance(
                            x,
                            NamedTable) and obj.__class__.__name__ == x._name

                    def promote(obj, x):
                        for n in dir(obj):
                            if not n.startswith("_") and not n in x._content:
                                x._content[n] = IndexedMaskedArray(
                                    numpy.empty(chunksize,
                                                dtype=awkward.array.base.
                                                AwkwardArray.INDEXTYPE), [])
                                x._content[n]._index[:offsets[-1] -
                                                     offsets[-2]] = x._content[
                                                         n]._maskedwhen
                                x._content[n]._nextindex = 0
                        return x

                else:

                    def ismine(obj, x):
                        return isinstance(
                            x, NamedTable
                        ) and obj.__class__.__name__ == x._name and all(
                            n in x._content
                            for n in dir(obj) if not n.startswith("_"))

                    def promote(obj, x):
                        return x

                def fillobj(obj, array, where):
                    for n in dir(obj):
                        if not n.startswith("_"):
                            if len(array._content[n]) == 0:
                                subchunks = []
                                suboffsets = [offsets[-2]]
                            else:
                                subchunks = [array._content[n]]
                                suboffsets = [offsets[-2], offsets[-1]]

                            fill(getattr(obj, n), subchunks, suboffsets)
                            array._content[n] = subchunks[-1]

                insert(obj, chunks, offsets, newchunk, ismine, promote,
                       fillobj)

            else:
                # iterable -> JaggedArray (and recurse)

                def newchunk(obj):
                    out = JaggedArray.fromoffsets(
                        numpy.zeros(
                            chunksize + 1,
                            dtype=awkward.array.base.AwkwardArray.INDEXTYPE),
                        PartitionedArray([0], []))
                    out._starts[0] = 0
                    out._content._offsets = [
                        0
                    ]  # as an appendable list, not a Numpy array
                    return out

                def ismine(obj, x):
                    return isinstance(x, JaggedArray)

                def promote(obj, x):
                    return x

                def fillobj(obj, array, where):
                    array._stops[where] = array._starts[where]
                    for x in it:
                        fill(x, array._content._chunks,
                             array._content._offsets)
                        array._stops[where] += 1

                insert(obj, chunks, offsets, newchunk, ismine, promote,
                       fillobj)

    def trim(length, array):
        if isinstance(array, numpy.ndarray):
            if len(array) == length:
                return array  # the length is right: don't copy it
            else:
                return numpy.array(
                    array[:length])  # copy so that the base can be deleted

        elif isinstance(array, PartitionedArray):
            for i in range(len(array._chunks)):
                array._chunks[i] = trim(
                    array._offsets[i + 1] - array._offsets[i],
                    array._chunks[i])
            return array

        elif isinstance(array, IndexedMaskedArray):
            index = trim(length, array._index)
            selection = (index != array._maskedwhen)
            content = trim(index[selection][-1] + 1, array._content)

            if isinstance(content, numpy.ndarray):
                # for simple types, IndexedMaskedArray wastes space; convert to an Arrow-like BitMaskedArray
                mask = numpy.zeros(
                    length, dtype=awkward.array.base.AwkwardArray.MASKTYPE)
                mask[selection] = True

                newcontent = numpy.empty(length, dtype=content.dtype)
                newcontent[selection] = content

                return BitMaskedArray.fromboolmask(mask,
                                                   newcontent,
                                                   maskedwhen=False,
                                                   lsb=True)

            else:
                # for complex types, IndexedMaskedArray saves space; keep it
                return IndexedMaskedArray(index, content)

        elif isinstance(array, UnionArray):
            tags = trim(length, array._tags)
            index = trim(length, array._index)

            contents = []
            for tag, content in enumerate(array._contents):
                length = index[tags == tag][-1] + 1
                contents.append(trim(length, content))

            return UnionArray(tags, index, contents)

        elif isinstance(array, JaggedArray):
            offsets = array.offsets  # fill creates aliased starts/stops
            if len(offsets) != length + 1:
                offsets = numpy.array(offsets[:length + 1])

            return JaggedArray.fromoffsets(offsets,
                                           trim(offsets[-1], array._content))

        elif isinstance(array, NamedTable):
            return NamedTable(
                length, array._name,
                collections.OrderedDict(
                    (n, trim(length, x)) for n, x in array._content.items()))

        elif isinstance(array, Table):
            return Table(
                length,
                collections.OrderedDict(
                    (n, trim(length, x)) for n, x in array._content.items()))

        elif isinstance(array, ObjectArray):
            return ObjectArray(array._generator, trim(length, array._content))

        else:
            raise AssertionError(array)

    chunks = []
    offsets = [0]
    length = 0
    for x in iterable:
        fill(x, chunks, offsets)
        length += 1

    return trim(length, PartitionedArray(offsets, chunks))
Esempio n. 57
0
#   reader.peek(length=1) - return the next `length` characters
#   reader.forward(length=1) - move the current position to `length` characters.
#   reader.index - the number of the current character.
#   reader.line, stream.column - the line and the column of the current character.

__all__ = ['Reader', 'ReaderError']

from error import YAMLError, Mark

import codecs, re

# Unfortunately, codec functions in Python 2.3 does not support the `finish`
# arguments, so we have to write our own wrappers.

try:
    codecs.utf_8_decode('', 'strict', False)
    from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode

except TypeError:

    def utf_16_le_decode(data, errors, finish=False):
        if not finish and len(data) % 2 == 1:
            data = data[:-1]
        return codecs.utf_16_le_decode(data, errors)

    def utf_16_be_decode(data, errors, finish=False):
        if not finish and len(data) % 2 == 1:
            data = data[:-1]
        return codecs.utf_16_be_decode(data, errors)

    def utf_8_decode(data, errors, finish=False):
Esempio n. 58
0
 def dataset(self, key):
     return oamap.schema.Dataset.fromjsonstring(codecs.utf_8_decode(self.dbm[_asbytes(self.DATASET + key)])[0])
Esempio n. 59
0
        # Split into lines and run the line preprocessors.
        self.lines = source.split("\n")
        for prep in self.preprocessors.values():
            self.lines = prep.run(self.lines)

        # Parse the high-level elements.
        root = self.parser.parseDocument(self.lines).getroot()

        # Run the tree-processors
        for treeprocessor in self.treeprocessors.values():
            newRoot = treeprocessor.run(root)
            if newRoot:
                root = newRoot

        # Serialize _properly_.  Strip top-level tags.
        output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8"))
        if self.stripTopLevelTags:
            try:
                start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2
                end = output.rindex('</%s>'%self.doc_tag)
                output = output[start:end].strip()
            except ValueError:
                if output.strip().endswith('<%s />'%self.doc_tag):
                    # We have an empty document
                    output = ''
                else:
                    # We have a serious problem
                    raise ValueError('Markdown failed to strip top-level tags. Document=%r' % output.strip())

        # Run the text post-processors
        for pp in self.postprocessors.values():
Esempio n. 60
0
 def outtextf(self, s):
     if type(s) is type(''): s = codecs.utf_8_decode(s)[0]
     self.outtext += s