def _save_to_file_html(self, oldfname, newfname):
        import encodings
        lexer = get_lexer_by_name("python", stripall=True)
        #formatter = HtmlFormatter(linenos=True, cssclass="source",
        #style=get_style_by_name('colorful'))
        formatter = MyHtmlFormatter(linenos='inline', module=self.__module)
        print "MODULE", self.__module
        encodings.search_function('utf8')
        ##next command for win

        source = encodings.utf_8.decode(open(oldfname, 'r').read())[0]
        result = highlight(source, lexer, formatter)
        outfile = open(newfname, 'w')
        outfile.write('''
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  "http://www.w3.org/TR/html4/strict.dtd">

<html>
<head>
  <title></title>
  <meta http-equiv="content-type" content="text/html; charset=utf-8">
  <style type="text/css">\n''')
        outfile.write(formatter.get_style_defs() + '\n')
        outfile.write('.highlight { line-height: 1.2em }\n')
        outfile.write('''
  </style>
</head>
<body>
<h2></h2>\n''')
        outfile.write(encodings.utf_8.encode(result)[0])
        outfile.close()
Esempio n. 2
0
def correctEncoding(value):
    """correct the encoding of a html-page if we know it an mxTidy
       expects an other encoding 
    """

    # we have nothing to do if mxTidy has no
    # fixed char_encoding
    if not MX_TIDY_OPTIONS.has_key('char_encoding')  \
           or ( MX_TIDY_OPTIONS['char_encoding'] == 'raw'):
        return value

    match = RE_GET_HTML_ENCODING.search(value)
    if match:
        groups = match.groups()

        # lookup encodings in the pyhon encodings database
        # returns function-pointers that we can compare
        # need to normalize encodings a bit before
        html_encoding = groups[1].strip().lower()
        char_encoding = MX_TIDY_OPTIONS['char_encoding'].lower().strip()
        h_enc = encodings.search_function(html_encoding)
        c_enc = encodings.search_function(char_encoding)

        # one encoding is missing or they are equal
        if not (h_enc and c_enc) or  h_enc == c_enc:
            return value
        else:
            try:
                return unicode(value, html_encoding).encode(char_encoding)
            except:
                logger.info("Error correcting encoding from %s to %s" % (html_encoding, char_encoding))
    return value
    def _save_to_file_html(self, oldfname, newfname):
        import encodings
        lexer = get_lexer_by_name("python", stripall=True)
        #formatter = HtmlFormatter(linenos=True, cssclass="source",
                                 #style=get_style_by_name('colorful'))
        formatter = MyHtmlFormatter(linenos='inline',module=self.__module)
        print "MODULE", self.__module
        encodings.search_function('utf8')
        ##next command for win

        source = encodings.utf_8.decode(open(oldfname, 'r').read())[0]
        result = highlight(source, lexer, formatter)
        outfile = open(newfname, 'w')
        outfile.write('''
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  "http://www.w3.org/TR/html4/strict.dtd">

<html>
<head>
  <title></title>
  <meta http-equiv="content-type" content="text/html; charset=utf-8">
  <style type="text/css">\n''')
        outfile.write(formatter.get_style_defs() + '\n')
        outfile.write('.highlight { line-height: 1.2em }\n')
        outfile.write('''
  </style>
</head>
<body>
<h2></h2>\n''')
        outfile.write(encodings.utf_8.encode(result)[0])
        outfile.close()
Esempio n. 4
0
def correctEncoding(value):
    """correct the encoding of a html-page if we know it an mxTidy
       expects an other encoding
    """

    # we have nothing to do if mxTidy has no
    # fixed char_encoding
    if 'char_encoding' not in MX_TIDY_OPTIONS  \
            or (MX_TIDY_OPTIONS['char_encoding'] == 'raw'):
        return value

    match = RE_GET_HTML_ENCODING.search(value)
    if match:
        groups = match.groups()

        # lookup encodings in the pyhon encodings database
        # returns function-pointers that we can compare
        # need to normalize encodings a bit before
        html_encoding = groups[1].strip().lower()
        char_encoding = MX_TIDY_OPTIONS['char_encoding'].lower().strip()
        h_enc = encodings.search_function(html_encoding)
        c_enc = encodings.search_function(char_encoding)

        # one encoding is missing or they are equal
        if not (h_enc and c_enc) or h_enc == c_enc:
            return value
        else:
            try:
                return unicode(value, html_encoding).encode(char_encoding)
            except:
                logger.info("Error correcting encoding from %s to %s" %
                            (html_encoding, char_encoding))
    return value
Esempio n. 5
0
def search_function( coding ):
    if not coding.lower().startswith( __pp_name__ ):
        return None

    dot = coding.find( '.' )
    if dot != -1:
#       -- coding: yupp.<encoding>
        if dot != len( __pp_name__ ):
#           -- wrong coding format
            return None

        codec = encodings.search_function( coding[( dot + 1 ): ])
        if codec is None:
#           -- unknown <encoding>
            return None

        basereader = codec.streamreader
    else:
        if len( coding ) != len( __pp_name__ ):
#           -- wrong coding format
            return None

#       -- default encoding: UTF-8
        basereader = utf_8.StreamReader

    utf8 = encodings.search_function( 'utf8' )
    return codecs.CodecInfo(
        name=__pp_name__,
        encode=utf8.encode,
        decode=utf8.decode,
        incrementalencoder=utf8.incrementalencoder,
        incrementaldecoder=utf8.incrementaldecoder,
        streamreader=yuppReaderFactory( basereader ),
        streamwriter=utf8.streamwriter
    )
Esempio n. 6
0
 def __init__(self, callbacks=None, encoding='utf-8'):
     '''Инициализация объекта для сборки JSON'''
     if callbacks:
         assert isinstance(callbacks, dict), 'Неверный тип параметра'
     self.encoding = encoding
     #корень дерева
     self.root = None
     #текуший узел, по которому идет парсинг
     self.current = None
     self.path = None
     #обработчки по подписке
     self.callbacks = callbacks
     self.kwargs = None
     self.parser = None
     self.error = None
     #словарь для преобразования кратких имен в полные
     self.alias_map = None
     #переменные которые будут переданы в callback
     self.kwargs = None
     #словарь с деревом запроса для определения PK
     self.schema = None
     # кеш схемф (путь->сущность)
     self.schema_map = dict()
     self.encode = encodings.search_function(encoding).encode
     self.decode = encodings.search_function(encoding).decode
Esempio n. 7
0
class TestWininstUtils(moves.unittest.TestCase):
    def setUp(self):
        self.src_root = tempfile.mkdtemp()
        self.bld_root = op.join(self.src_root, "build")

        root = create_root_with_source_tree(self.src_root, self.bld_root)
        self.top_node = root.find_node(self.src_root)

    def tearDown(self):
        shutil.rmtree(self.top_node.abspath())

    def test_get_inidata_run(self):
        """Simply execute get_inidata."""
        # FIXME: do a real test here
        meta, sections, nodes = create_simple_ipkg_args(self.top_node)
        ipkg = BuildManifest(sections, meta, {})
        get_inidata(ipkg)

    @mock.patch('distutils.msvccompiler.get_build_version', lambda: 9.0)
    @mock.patch('encodings._cache',
                {"mbcs": encodings.search_function("ascii")})
    def test_create_exe(self):
        # FIXME: do a real test here
        meta, sections, nodes = create_simple_ipkg_args(self.top_node)
        ipkg = BuildManifest(sections, meta, {})

        fid, arcname = tempfile.mkstemp(prefix="zip")
        try:
            create_exe(ipkg, arcname, "some-name.exe")
        finally:
            os.close(fid)
Esempio n. 8
0
    def index(self, path=b'', new_encoding=None):
        """
        Update repository encoding via Ajax.
        """
        self.assertIsInstance(path, bytes)
        self.assertTrue(new_encoding)

        _logger.debug("update repo [%r] settings [%r]", path, new_encoding)

        # Check user permissions
        repo_obj = self.validate_user_path(path)[0]

        # Validate the encoding value
        new_codec = encodings.search_function(new_encoding.lower())
        if not new_codec:
            raise cherrypy.HTTPError(400, _("invalid encoding value"))

        new_encoding = new_codec.name
        if not isinstance(new_encoding, str):
            # Python 2
            new_encoding = new_encoding.decode('ascii')

        # Update the repository encoding
        _logger.info("updating repository [%s] encoding [%s]", repo_obj, new_encoding)
        repo_obj.set_encoding(new_encoding)

        return _("Updated")
Esempio n. 9
0
def search_function(encoding):
    if encoding != 'interpy': return None
    # Assume utf8 encoding
    from encodings import utf_8
    utf8 = encodings.search_function('utf8')

    def interpy_decode(input, errors='strict'):
        if isinstance(input, memoryview):
            input = input.tobytes().decode("utf-8")
        return utf8.decode(interpy_transform_string(input), errors)

    class InterpyIncrementalDecoder(utf_8.IncrementalDecoder):
        def decode(self, input, final=False):
            self.buffer += input
            if final:
                buff = self.buffer
                self.buffer = ''
                return super(InterpyIncrementalDecoder, self).decode(
                    interpy_transform_string(buff), final=True)

    class InterpyStreamReader(utf_8.StreamReader):
        def __init__(self, *args, **kwargs):
            codecs.StreamReader.__init__(self, *args, **kwargs)
            self.stream = StringIO(interpy_transform(self.stream))


    return codecs.CodecInfo(
        name = 'interpy',
        encode = utf8.encode,
        decode = interpy_decode,
        incrementalencoder = utf8.incrementalencoder,
        incrementaldecoder = InterpyIncrementalDecoder,
        streamreader = InterpyStreamReader,
        streamwriter = utf8.streamwriter)
Esempio n. 10
0
def processmetadata(metadata, messages):
    """See whether there is an encoding/language declaration."""
    k, v = metadata.split(':', 1)
    k = k.strip().lower()
    v = v.strip()
    if k == 'content-type':
        charset = v.split('charset=')[1]
        if not charset:
            return
        encoding = encodings.search_function(charset.lower())
        encoding = encoding and encoding.name
        if encoding is None:
            raise RuntimeError('The {} charset does not supported by current '
                               'Python. If you want to go on process, '
                               'please change it to UTF-8.'.format(charset))
        if encoding not in supported_charset:
            raise ValueError('The {} charset does not supported by PO files. '
                             'If you want to go on process, '
                             'please change it to UTF-8.'.format(charset))
        encoding_last = messages['encoding_last']
        if encoding_last and encoding != encoding_last:
            raise ValueError(
                'input files has different charset: {} and {}, '
                'could not be merged. If you want to go on process'
                ', please make them same, '
                'or change both of them to UTF-8.'.format(
                    supported_charset[encoding_last], charset))
        messages['encoding'] = encoding
    elif k == 'language':
        if v:
            messages['language'] = v
Esempio n. 11
0
        def decode(text, *args):
            """Used by pypy and pylint to deal with a spec file"""
            buffered = cStringIO.StringIO(text)

            # Determine if we need to have imports for this string
            # It may be a fragment of the file
            has_spec = regexes['encoding_matcher'].search(buffered.readline())
            no_imports = not has_spec
            buffered.seek(0)

            # Translate the text
            utf8 = encodings.search_function('utf8')  # Assume utf8 encoding
            reader = utf8.streamreader(buffered)
            data = self.dealwith(reader.readline, no_imports=no_imports)

            # If nothing was changed, then we want to use the original file/line
            # Also have to replace indentation of original line with indentation of new line
            # To take into account nested describes
            if text and not regexes['only_whitespace'].match(text):
                if regexes['whitespace'].sub(
                        '', text) == regexes['whitespace'].sub('', data):
                    bad_indentation = regexes['leading_whitespace'].search(
                        text).groups()[0]
                    good_indentation = regexes['leading_whitespace'].search(
                        data).groups()[0]
                    data = '%s%s' % (good_indentation,
                                     text[len(bad_indentation):])

            # If text is empty and data isn't, then we should return text
            if len(text) == 0 and len(data) == 1:
                return unicode(text), 0

            # Return translated version and it's length
            return unicode(data), len(data)
Esempio n. 12
0
    def _load_hints(self):
        """For different purpose, a repository may contains an "rdiffweb" file
        to provide hint to rdiffweb related to locale. At first, it's used to
        define an encoding."""

        hint_file = os.path.join(self.data_path, b"rdiffweb")
        if not os.access(hint_file, os.F_OK) or os.path.isdir(hint_file):
            return

        # Read rdiffweb file asconfiguration file.
        config = Configuration(hint_file)
        name = config.get_config('encoding', default=FS_ENCODING)
        self.encoding = encodings.search_function(name.lower())
        if not self.encoding:
            encodings.search_function(FS_ENCODING)
        assert self.encoding
Esempio n. 13
0
 def decode(text, *args):
     """Used by pypy and pylint to deal with a spec file"""
     buffered = cStringIO.StringIO(text)
     
     # Determine if we need to have imports for this string
     # It may be a fragment of the file
     has_spec = regexes['encoding_matcher'].search(buffered.readline())
     no_imports = not has_spec
     buffered.seek(0)
     
     # Translate the text
     utf8 = encodings.search_function('utf8') # Assume utf8 encoding
     reader = utf8.streamreader(buffered)
     data = self.dealwith(reader.readline, no_imports=no_imports)
     
     # If nothing was changed, then we want to use the original file/line
     # Also have to replace indentation of original line with indentation of new line
     # To take into account nested describes
     if text and not regexes['only_whitespace'].match(text):
         if regexes['whitespace'].sub('', text) == regexes['whitespace'].sub('', data):
             bad_indentation = regexes['leading_whitespace'].search(text).groups()[0]
             good_indentation = regexes['leading_whitespace'].search(data).groups()[0]
             data = '%s%s' % (good_indentation, text[len(bad_indentation):])
     
     # If text is empty and data isn't, then we should return text
     if len(text) == 0 and len(data) == 1:
         return unicode(text), 0
     
     # Return translated version and it's length
     return unicode(data), len(data)
Esempio n. 14
0
def search_function(encoding):
    if 'portpy' not in encoding:
        return None
    splitted = encoding.split('-')
    lang = Language(splitted[-1] if len(splitted) > 1 else 'pt')

    # Assume utf8 encoding
    utf_8 = encodings.utf_8
    utf8 = encodings.search_function('utf8')

    def portpy_decode(input, errors='strict'):
        if isinstance(input, memoryview):
            input = input.tobytes()
        input = BytesIO(input).read()
        return utf8.decode(lang.translate(input), errors)

    class PortpyIncrementalDecoder(utf_8.IncrementalDecoder):
        def decode(self, input, final=False):
            return super(PortpyIncrementalDecoder, self).decode(
                    lang.translate(input), final=final)

    class PortpyStreamReader(utf_8.StreamReader):
        def __init__(self, *args, **kwargs):
            codecs.StreamReader.__init__(self, *args, **kwargs)
            text = self.stream.read()
            self.stream = StringIO(lang.translate(text))

    return codecs.CodecInfo(
        name = 'portpy',
        encode = utf8.encode,
        decode = portpy_decode,
        incrementalencoder = utf8.incrementalencoder,
        incrementaldecoder = PortpyIncrementalDecoder,
        streamreader = PortpyStreamReader,
        streamwriter = utf8.streamwriter)
Esempio n. 15
0
def _installFutureFStrings():
    """Install fake UTF8 handle just as future-fstrings does.

    This unbreaks at least
    """

    # Singleton, pylint: disable=global-statement
    global _fstrings_installed

    if _fstrings_installed:
        return

    # TODO: Not supporting anything before that.
    if python_version >= 0x360:
        import codecs

        # Play trick for of "future_strings" PyPI package support. It's not needed,
        # but some people use it even on newer Python.
        try:
            codecs.lookup("future-fstrings")
        except LookupError:
            import encodings

            utf8 = encodings.search_function("utf8")
            codec_map = {"future-fstrings": utf8, "future_fstrings": utf8}
            codecs.register(codec_map.get)
    else:
        try:
            import future_fstrings
        except ImportError:
            pass
        else:
            future_fstrings.register()

    _fstrings_installed = True
Esempio n. 16
0
def guess_encoding(buffer):
    """Better guess encoding method

    It checks if python supports the encoding
    """
    encoding = _guess_encoding(buffer)
    # step 1: if the encoding was detected, use the lower() because python
    # is using lower case names for encodings
    if encoding and isinstance(encoding, basestring):
        # encoding = encoding.lower()
        pass
    else:
        return None
    # try to find an encoding function for the encoding
    # if None is returned or an exception is raised the encoding is invalid
    try:
        result = encodings.search_function(encoding.lower())
    except:
        # XXX log
        result = None

    if result:
        # got a valid encoding
        return encoding
    else:
        return None
Esempio n. 17
0
def search_function(encoding):
    if encoding != 'interpy': return None
    # Assume utf8 encoding
    from encodings import utf_8
    utf8 = encodings.search_function('utf8')

    def interpy_decode(input, errors='strict'):
        if isinstance(input, memoryview):
            input = input.tobytes().decode("utf-8")
        return utf8.decode(interpy_transform_string(input), errors)

    class InterpyIncrementalDecoder(utf_8.IncrementalDecoder):
        def decode(self, input, final=False):
            self.buffer += input
            if final:
                buff = self.buffer
                self.buffer = ''
                return super(InterpyIncrementalDecoder,
                             self).decode(interpy_transform_string(buff),
                                          final=True)

    class InterpyStreamReader(utf_8.StreamReader):
        def __init__(self, *args, **kwargs):
            codecs.StreamReader.__init__(self, *args, **kwargs)
            self.stream = StringIO(interpy_transform(self.stream))

    return codecs.CodecInfo(name='interpy',
                            encode=utf8.encode,
                            decode=interpy_decode,
                            incrementalencoder=utf8.incrementalencoder,
                            incrementaldecoder=InterpyIncrementalDecoder,
                            streamreader=InterpyStreamReader,
                            streamwriter=utf8.streamwriter)
Esempio n. 18
0
    def index(self, path=b'', new_encoding=None):
        """
        Update repository encoding via Ajax.
        """
        self.assertIsInstance(path, bytes)
        self.assertTrue(new_encoding)

        _logger.debug("update repo [%r] settings [%r]", path, new_encoding)

        # Check user permissions
        repo_obj = self.validate_user_path(path)[0]

        # Validate the encoding value
        new_codec = encodings.search_function(new_encoding.lower())
        if not new_codec:
            raise cherrypy.HTTPError(400, _("invalid encoding value"))

        new_encoding = new_codec.name
        if not isinstance(new_encoding, str):
            # Python 2
            new_encoding = new_encoding.decode('ascii')

        # Update the repository encoding
        _logger.info("updating repository [%s] encoding [%s]", repo_obj,
                     new_encoding)
        repo_obj.set_encoding(new_encoding)

        return _("Updated")
Esempio n. 19
0
def guess_encoding(buffer):
    """Better guess encoding method

    It checks if python supports the encoding
    """
    encoding = _guess_encoding(buffer)
    # step 1: if the encoding was detected, use the lower() because python
    # is using lower case names for encodings
    if encoding and isinstance(encoding, basestring):
        # encoding = encoding.lower()
        pass
    else:
        return None
    # try to find an encoding function for the encoding
    # if None is returned or an exception is raised the encoding is invalid
    try:
        result = encodings.search_function(encoding.lower())
    except:
        # XXX log
        result = None

    if result:
        # got a valid encoding
        return encoding
    else:
        return None
Esempio n. 20
0
    def get_payload(
            self,
            index: Optional[Any] = None,
            decode: bool = False) -> Union[str, "UMessage", List["UMessage"]]:
        message = self.message

        if index is None:
            # mypy: Argument 1 to "get_payload" of "Message" has incompatible type "None";
            # mypy: expected "int"
            # email.message.Message.get_payload has type signature:
            # Message.get_payload(self, i=None, decode=False)
            # so None seems to be totally acceptable, I don't understand mypy here
            payload = message.get_payload(index, decode)  # type: ignore

            if isinstance(payload, list):
                return [UMessage(msg) for msg in payload]

            if message.get_content_maintype() != "text":
                return payload

            if isinstance(payload, str):
                return payload

            charset = message.get_content_charset() or "iso-8859-1"
            if search_function(charset) is None:
                charset = "iso-8859-1"

            return str(payload or b"", charset, "replace")
        else:
            payload = UMessage(message.get_payload(index, decode))

        return payload
Esempio n. 21
0
 def __init__(self):
     self.encoding = encodings.search_function('utf-8')
     assert self.encoding
     self.repo_root = bytes(pkg_resources.resource_filename(
         'rdiffweb', 'tests'),
                            encoding='utf-8')  # @UndefinedVariable
     self.data_path = os.path.join(self.repo_root, b'rdiff-backup-data')
     self.root_path = MockRdiffPath(self)
Esempio n. 22
0
def normalize_encoding(enc):
    if enc is None:
        return sys.getdefaultencoding()
    enc_ = encodings.search_function(enc)
    if not enc_:
        raise ValueError(
            "{} is not a valid text encoding; see encodings.aliases.aliases for the set of legal "
            "values".format(enc))
    return enc_.name
Esempio n. 23
0
def search_function(encoding):
    if encoding != "my_truth":
        return None
    utf8 = encodings.search_function("utf8")
    return codecs.CodecInfo(
        name="my_truth",
        encode=utf8.encode,
        decode=my_truth_decode,
    )
def find_encoding_by_country(country):
    local_name = locale.locale_alias.get(country, None)
    if local_name:
        alias = local_name.split(".")[-1].lower()
        codec = encodings.search_function(alias)
        if codec:
            return codec.name

    return locale.getpreferredencoding()
Esempio n. 25
0
def search_function(s):
    if s!='GDASyntax': return None
    utf8=encodings.search_function('utf8') # assuming utf8 encoding
    return codecs.CodecInfo(name='GDASyntax',
                            encode=utf8.encode,
                            decode=utf8.decode,
                            incrementalencoder=utf8.incrementalencoder,
                            incrementaldecoder=utf8.incrementaldecoder,
                            streamreader=StreamReader,
                            streamwriter=utf8.streamwriter)
Esempio n. 26
0
def search_function(encoding):
    if encoding != 'match-python':
        return None
    utf8 = encodings.search_function('utf8')
    return codecs.CodecInfo( name='match-python', 
        encode=utf8.encode, decode=utf8.decode,
        incrementalencoder=utf8.incrementalencoder,
        incrementaldecoder=utf8.incrementaldecoder,
        streamreader=MatchPythonStreamReader,
        streamwriter=utf8.streamwriter)
Esempio n. 27
0
def search_function(s):
    if s != 'blocks': return None
    utf8 = encodings.search_function('utf8')
    return codecs.CodecInfo(name='blocks',
                            encode=utf8.encode,
                            decode=utf8.decode,
                            incrementalencoder=utf8.incrementalencoder,
                            incrementaldecoder=utf8.incrementaldecoder,
                            streamreader=StreamReader,
                            streamwriter=utf8.streamwriter)
def search_function(s):
    if s != 'tilde': return None
    utf8 = encodings.search_function('utf8')  # Assume utf8 encoding
    return codecs.CodecInfo(name='tilde',
                            encode=utf8.encode,
                            decode=tilde_decode,
                            incrementalencoder=utf8.incrementalencoder,
                            incrementaldecoder=utf8.incrementaldecoder,
                            streamreader=StreamReader,
                            streamwriter=utf8.streamwriter)
Esempio n. 29
0
def search_function(s):
    if s != 'GDASyntax': return None
    utf8 = encodings.search_function('utf8')  # assuming utf8 encoding
    return codecs.CodecInfo(name='GDASyntax',
                            encode=utf8.encode,
                            decode=utf8.decode,
                            incrementalencoder=utf8.incrementalencoder,
                            incrementaldecoder=utf8.incrementaldecoder,
                            streamreader=StreamReader,
                            streamwriter=utf8.streamwriter)
Esempio n. 30
0
def search_function(s):
    if s!="acodec": 
        return None
    u8=encodings.search_function("utf8")
    return codecs.CodecInfo( name='acodec', 
        encode=u8.encode, decode=u8.decode,
        incrementalencoder=u8.incrementalencoder,
        incrementaldecoder=u8.incrementaldecoder,
        streamreader=aStreamReader,        # acodec StreamReader
        streamwriter=u8.streamwriter)
Esempio n. 31
0
    def _set_encoding(self, value):
        """Change the repository encoding"""
        # Validate if the value is a valid encoding before updating the database.
        codec = encodings.search_function(value.lower())
        if not codec:
            raise ValueError(_('invalid encoding %s') % value)

        logger.info("updating repository %s encoding %s", self, codec.name)
        self._set_attr('encoding', codec.name)
        self._encoding = codec
Esempio n. 32
0
def read_header(f):
    f.seek(0)

    magic = f.read(len(MAGIC))
    if (magic != MAGIC):
        raise UnknownFileFormat()

    uuid = UUID(bytes=f.read(16))
    encoding = read_byte_string(f, U_CHAR).decode(UTF8)
    if encodings.search_function(encoding) is None:
        raise UnknownEncoding(encoding)

    f = StructReader(f, encoding)
    compression = f.read_tiny_text()
    if compression not in COMPRESSIONS:
        raise UnknownCompression(compression)

    def read_tags():
        tags = {}
        count = f.read_byte()
        for _ in range(count):
            key = f.read_tiny_text()
            value = f.read_tiny_text()
            tags[key] = value
        return tags

    tags = read_tags()

    def read_content_types():
        content_types = []
        count = f.read_byte()
        for _ in range(count):
            content_type = f.read_text()
            content_types.append(content_type)
        return tuple(content_types)

    content_types = read_content_types()

    blob_count = f.read_int()
    store_offset = f.read_long()
    size = f.read_long()
    refs_offset = f.tell()

    return Header(
        magic=magic,
        uuid=uuid,
        encoding=encoding,
        compression=compression,
        tags=MappingProxyType(tags),
        content_types=content_types,
        blob_count=blob_count,
        store_offset=store_offset,
        refs_offset=refs_offset,
        size=size,
    )
Esempio n. 33
0
 def search_function(s):
     """Determine if a file is of spec encoding and return special CodecInfo if it is"""
     if s != 'spec': return None
     utf8 = encodings.search_function('utf8')  # Assume utf8 encoding
     return codecs.CodecInfo(name='spec',
                             encode=utf8.encode,
                             decode=decode,
                             streamreader=StreamReader,
                             streamwriter=utf8.streamwriter,
                             incrementalencoder=utf8.incrementalencoder,
                             incrementaldecoder=utf8.incrementaldecoder)
Esempio n. 34
0
def scubapy_codec(s):
    if s!= 'scubapy': return None
    utf8=encodings.search_function('utf8') # Assume utf8 encoding
    return codecs.CodecInfo(
        name='scubapy',
        encode = utf8.encode,
        decode = utf8.decode,
        incrementalencoder=utf8.incrementalencoder,
        incrementaldecoder=utf8.incrementaldecoder,
        streamreader=StreamReader,
        streamwriter=utf8.streamwriter)
Esempio n. 35
0
def search_function(encoding):
    if encoding != 'pytwister': return None
    # Assume utf8 encoding
    utf8 = encodings.search_function('utf8')
    return codecs.CodecInfo(name='pytwister',
                            encode=utf8.encode,
                            decode=pytwister_decode,
                            incrementalencoder=utf8.incrementalencoder,
                            incrementaldecoder=PytwisterIncrementalDecoder,
                            streamreader=PytwisterStreamReader,
                            streamwriter=utf8.streamwriter)
Esempio n. 36
0
def search_function(s):
    ''' Allows "FoxDot" files to be imported properly '''
    if s != 'foxdot': return None
    utf8 = encodings.search_function('utf8')  # Assume utf8 encoding
    return codecs.CodecInfo(name='foxdot',
                            encode=utf8.encode,
                            decode=utf8.decode,
                            incrementalencoder=utf8.incrementalencoder,
                            incrementaldecoder=utf8.incrementaldecoder,
                            streamreader=StreamReader,
                            streamwriter=utf8.streamwriter)
Esempio n. 37
0
def search_function(s):
    if s!='blocks': return None
    utf8=encodings.search_function('utf8')
    return codecs.CodecInfo(
        name='blocks',
        encode = utf8.encode,
        decode = utf8.decode,
        incrementalencoder=utf8.incrementalencoder,
        incrementaldecoder=utf8.incrementaldecoder,
        streamreader=StreamReader,
        streamwriter=utf8.streamwriter)
Esempio n. 38
0
def getImportedDict( mainscript ):
    # importing a lot of stuff, just because they are dependencies.
    # pylint: disable=W0612,R0914

    # chdir to mainscript directory and add it to sys.path
    main_dir = os.path.dirname( mainscript )
    os.chdir( main_dir )
    sys.path.insert( 0, main_dir )

    # import modules needed but not listed in sys.modules
    import imp, zipimport, site, io, marshal, pickle
    import encodings, encodings.aliases, codecs
    import zlib, inspect, threading, traceback
    import ctypes
    if sys.version_info < ( 3, 0, 0 ):
        import StringIO, cStringIO
        import cPickle
        import thread
    for code in encodings.aliases.aliases.keys():
        try:
            encodings.search_function( code )
        except ( ImportError, AttributeError ):
            pass

    # get modules from main script
    import modulefinder
    finder = modulefinder.ModuleFinder()
    finder.run_script( mainscript )

    # resolve dependency
    for name in finder.modules:
        method = dependency_resolver.get( name )
        if not method:
            continue
        method[0]( *method[1:] )

    # merge sys.modules
    imported_dict = {}
    imported_dict.update( sys.modules )
    imported_dict.update( finder.modules )
    return imported_dict
Esempio n. 39
0
def getImportedDict(mainscript):
    # importing a lot of stuff, just because they are dependencies.
    # pylint: disable=W0612,R0914

    # chdir to mainscript directory and add it to sys.path
    main_dir = os.path.dirname(mainscript)
    os.chdir(main_dir)
    sys.path.insert(0, main_dir)

    # import modules needed but not listed in sys.modules
    import imp, zipimport, site, io, marshal, pickle
    import encodings, encodings.aliases, codecs
    import zlib, inspect, threading, traceback
    import ctypes
    if sys.version_info < (3, 0, 0):
        import StringIO, cStringIO
        import cPickle
        import thread
    for code in encodings.aliases.aliases.keys():
        try:
            encodings.search_function(code)
        except (ImportError, AttributeError):
            pass

    # get modules from main script
    import modulefinder
    finder = modulefinder.ModuleFinder()
    finder.run_script(mainscript)

    # resolve dependency
    for name in finder.modules:
        method = dependency_resolver.get(name)
        if not method:
            continue
        method[0](*method[1:])

    # merge sys.modules
    imported_dict = {}
    imported_dict.update(sys.modules)
    imported_dict.update(finder.modules)
    return imported_dict
Esempio n. 40
0
def search_function(encoding):
    if encoding != 'braces': return None
    # Assume utf8 encoding
    utf8 = encodings.search_function('utf8')
    return codecs.CodecInfo(
        name = 'braces',
        encode = utf8.encode,
        decode = utf8.decode,
        incrementalencoder = utf8.incrementalencoder,
        incrementaldecoder = utf8.incrementaldecoder,
        streamreader = BracesStreamReader,
        streamwriter = utf8.streamwriter)
Esempio n. 41
0
def search_function(encoding):
    if encoding != 'pyxl': return None
    # Assume utf8 encoding
    utf8=encodings.search_function('utf8')
    return codecs.CodecInfo(
        name = 'pyxl',
        encode = utf8.encode,
        decode = pyxl_decode,
        incrementalencoder = utf8.incrementalencoder,
        incrementaldecoder = PyxlIncrementalDecoder,
        streamreader = PyxlStreamReader,
        streamwriter = utf8.streamwriter)
Esempio n. 42
0
    def _get_encoding(self):
        """Return the repository encoding in a normalized format (lowercase and replace - by _)."""
        # For backward compatibility, look into the database and fallback to
        # the rdiffweb config file in the repo.
        encoding = self._get_attr('encoding')
        if encoding:
            return encodings.search_function(encoding.lower())

        # Read encoding value from obsolete config file.
        try:
            conf_file = os.path.join(self._data_path, b'rdiffweb')
            if os.access(conf_file, os.F_OK) and os.path.isfile(conf_file):
                config = read_config(conf_file)
                encoding = config.get('encoding')
                if encoding:
                    return encodings.search_function(encoding)
        except:
            logger.exception("fail to get repo encoding from file")

        # Fallback to default encoding.
        return encodings.search_function(DEFAULT_REPO_ENCODING)
Esempio n. 43
0
def search_function(s):
    if s != "pyspec": return None

    utf8 = encodings.search_function("utf8")
    return codecs.CodecInfo(
        name = "pyspec",
        encode = utf8.encode,
        decode = utf8.decode,
        incrementalencoder = utf8.incrementalencoder,
        incrementaldecoder = utf8.incrementaldecoder,
        streamreader = StreamReader,
        streamwriter = utf8.streamwriter)
Esempio n. 44
0
 def search_function(s):
     """Determine if a file is of spec encoding and return special CodecInfo if it is"""
     if s != 'spec': return None
     utf8 = encodings.search_function('utf8') # Assume utf8 encoding
     return codecs.CodecInfo(
           name='spec'
         , encode=utf8.encode
         , decode=decode
         , streamreader=StreamReader
         , streamwriter=utf8.streamwriter
         , incrementalencoder=utf8.incrementalencoder
         , incrementaldecoder=utf8.incrementaldecoder
         )
Esempio n. 45
0
def search_function(encoding):
    if encoding != "inlinec":
        return None
    utf8 = encodings.search_function("utf8")
    return codecs.CodecInfo(
        name="inlinec",
        encode=utf8.encode,
        decode=inlinec_decode,
        incrementalencoder=utf8.incrementalencoder,
        incrementaldecoder=InlinecIncrementalDecoder,
        streamreader=InlinecStreamReader,
        streamwriter=utf8.streamwriter,
    )
Esempio n. 46
0
def search_function(s):
    ## print s
    if s!='token_myprint':
        return None
    utf8=encodings.search_function('utf8') # Assume utf8 encoding
    return codecs.CodecInfo(
        name='mylang',
        encode = utf8.encode,
        decode = utf8.decode,
        incrementalencoder=utf8.incrementalencoder,
        incrementaldecoder=utf8.incrementaldecoder,
        streamreader=StreamReader,
        streamwriter=utf8.streamwriter)
def tarantool_encoding_builder(encoding_name):
    """Return an encoding that pre-processes the input and
    rewrites it to be pure python"""
    if encoding_name == "tarantool":
        utf8 = encodings.search_function("utf8")
        return codecs.CodecInfo(name = "tarantool",
                                encode = utf8.encode,
                                decode = utf8.decode,
                                incrementalencoder = utf8.incrementalencoder,
                                incrementaldecoder = utf8.incrementaldecoder,
                                streamreader = TarantoolStreamReader,
                                streamwriter = utf8.streamwriter)
    return None
Esempio n. 48
0
def search_function(codec_string):
    if codec_string != codec_name: 
       return None

    utf8 = encodings.search_function('utf8') 
    return codecs.CodecInfo(
           name=codec_name, 
           encode=utf8.encode, 
           decode=utf8.decode, 
           incrementalencoder=utf8.incrementalencoder,  
           incrementaldecoder=utf8.incrementaldecoder, 
           streamreader=StreamReader, 
           streamwriter=utf8.streamwriter
           )
Esempio n. 49
0
def search_function(encoding):
    if encoding != 'mypy':
        return None
    # Assume utf8 encoding
    utf8 = encodings.search_function('utf8')
    if sys.version_info[0] == 3:  # Python 3
        return utf8
    else:  # Python 2
        from .mypy_codec import mypy_decode, MyPyIncrementalDecoder, MyPyStreamReader
        return codecs.CodecInfo(name='mypy',
                                encode=utf8.encode,
                                decode=mypy_decode,
                                incrementalencoder=utf8.incrementalencoder,
                                incrementaldecoder=MyPyIncrementalDecoder,
                                streamreader=MyPyStreamReader,
                                streamwriter=utf8.streamwriter)
def processor(coding):
    if coding != 'pypatt':
        return None

    logging.debug('CODING: PYPATT')

    utf8 = encodings.search_function('utf8')

    return codecs.CodecInfo(
        name = 'pypatt',
        encode = utf8.encode,
        decode = utf8.decode,
        incrementalencoder = utf8.incrementalencoder,
        incrementaldecoder = utf8.incrementaldecoder,
        streamreader = StreamReader,
        streamwriter = utf8.streamwriter)
Esempio n. 51
0
    def get_payload(self, index=None, decode=False):
        message = self.message
        if index is None:
            payload = message.get_payload(index, decode)
            if isinstance(payload, list):
                return [UMessage(msg) for msg in payload]
            if message.get_content_maintype() != 'text':
                return payload

            charset = message.get_content_charset() or 'iso-8859-1'
            if search_function(charset) is None:
                charset = 'iso-8859-1'
            return unicode(payload or '', charset, "replace")
        else:
            payload = UMessage(message.get_payload(index, decode))
        return payload
Esempio n. 52
0
def search_function(name):
    log.debug("Catching '%s' codec name"%(name))

    #ignore not interesting encodings
    if not name.startswith("dsl-"):
        return None
    log.debug("'%s' codec name accepted as DSL '%s' invoking"%(name, name[len("dsl-"):]))
    
    #unicode FTW, we'll use UTF8 ending as base of DSL
    utf8 = encodings.search_function('utf8')
    log.debug("Original UTF8 encoding found, creating new codec with wrapper")
    return codecs.CodecInfo(
        name=name,
        encode = utf8.encode,
        decode = utf8.decode,
        incrementalencoder=utf8.incrementalencoder,
        incrementaldecoder=utf8.incrementaldecoder,
        streamreader=generate_parser(name[4:]),
        streamwriter=utf8.streamwriter)
Esempio n. 53
0
def validate_charset(charset):
    return charset and encodings.search_function(charset) is not None
Esempio n. 54
0
 def __init__(self):
     self.encoding = encodings.search_function('utf-8')
     assert self.encoding
     self.repo_root = bytes(pkg_resources.resource_filename('rdiffweb', 'tests'), encoding='utf-8')  # @UndefinedVariable
     self.data_path = os.path.join(self.repo_root, b'rdiff-backup-data')
     self.root_path = MockRdiffPath(self)
Esempio n. 55
0
                      help="Add a semicolon at the end of each code cell. "
                            "This assumes E703 isn't ignored and applies only "
                            "if -n is chosen.")
    PARSER.add_option('--autopep8-options', dest='autopep8_options', action='store',
                      type='string', default='--ignore=E501',
                      help="(in quotes) passed to autopep8. "
                           "pep8 arguments can be passed via its config file.")
    PARSER.add_option('-e', '--encoding', dest='encoding', action='store',
                      type="string", default="utf-8",
                      help="Specifies the encoding of the original files."
                           "The encoding will be preserved")
    (OPTIONS, ARGS) = PARSER.parse_args()

    encoding = OPTIONS.encoding

    if encodings.search_function(encoding) is None:
        if encoding.startswith("-"):
            print "error: --encoding option requires an argument"
            exit(1)
        else:
            print "Specified encoding not found"
            exit(1)

    if len(ARGS) == 0:
        SUCCESSES = process_files(".", OPTIONS)
    else:
        SUCCESSES = sum((process_files(arg, OPTIONS) for arg in ARGS), [])
    if all(SUCCESSES):
        exit(0)
    else:
        exit(1)
Esempio n. 56
0
class IncrementalDecoder(utf_8.IncrementalDecoder):
    def decode(self, input, final=False):
        if final:
            return super(IncrementalDecoder, self).decode(
                b"import pyfu ; pyfu.magic(__file__, __name__); del pyfu", final=True
            )


class StreamReader(utf_8.StreamReader):
    def __init__(self, *args, **kwargs):
        codecs.StreamReader.__init__(self, *args, **kwargs)
        self.stream = StringIO.StringIO(b"import pyfu ; pyfu.magic(__file__, __name__); del pyfu")


utf8_codec_info = encodings.search_function("utf8")
codec_info = codecs.CodecInfo(
    name="pyfu",
    encode=utf8_codec_info.encode,
    decode=decode,
    incrementalencoder=utf8_codec_info.incrementalencoder,
    incrementaldecoder=IncrementalDecoder,
    streamwriter=utf8_codec_info.streamwriter,
    streamreader=StreamReader,
)


def search_function(encoding):
    if encoding == "pyfu":
        return codec_info
Esempio n. 57
0
    def register(self):
        """Register spec codec"""
        # Assume utf8 encoding
        utf8 = encodings.search_function('utf8')

        class StreamReader(utf_8.StreamReader):
            """Used by cPython to deal with a spec file"""
            def __init__(sr, stream, *args, **kwargs):
                codecs.StreamReader.__init__(sr, stream, *args, **kwargs)
                data = self.dealwith(sr.stream.readline)
                sr.stream = StringIO(data)

        def decode(text, *args, **kwargs):
            """Used by pypy and pylint to deal with a spec file"""
            return_tuple = kwargs.get("return_tuple", True)

            if six.PY3:
                if hasattr(text, 'tobytes'):
                    text = text.tobytes().decode('utf8')
                else:
                    text = text.decode('utf8')

            buffered = StringIO(text)

            # Determine if we need to have imports for this string
            # It may be a fragment of the file
            has_spec = regexes['encoding_matcher'].search(buffered.readline())
            no_imports = not has_spec
            buffered.seek(0)

            # Translate the text
            if six.PY2:
                utf8 = encodings.search_function('utf8') # Assume utf8 encoding
                reader = utf8.streamreader(buffered)
            else:
                reader = buffered

            data = self.dealwith(reader.readline, no_imports=no_imports)

            # If nothing was changed, then we want to use the original file/line
            # Also have to replace indentation of original line with indentation of new line
            # To take into account nested describes
            if text and not regexes['only_whitespace'].match(text):
                if regexes['whitespace'].sub('', text) == regexes['whitespace'].sub('', data):
                    bad_indentation = regexes['leading_whitespace'].search(text).groups()[0]
                    good_indentation = regexes['leading_whitespace'].search(data).groups()[0]
                    data = '%s%s' % (good_indentation, text[len(bad_indentation):])

            # If text is empty and data isn't, then we should return text
            if len(text) == 0 and len(data) == 1:
                if return_tuple:
                    return "", 0
                else:
                    return ""

            # Return translated version and it's length
            if return_tuple:
                return data, len(data)
            else:
                return data

        incrementaldecoder = utf8.incrementaldecoder
        if six.PY3:
            def incremental_decode(decoder, *args, **kwargs):
                """Wrapper for decode from IncrementalDecoder"""
                kwargs["return_tuple"] = False
                return decode(*args, **kwargs)
            incrementaldecoder = type("IncrementalDecoder", (utf8.incrementaldecoder, ), {"decode": incremental_decode})

        def search_function(s):
            """Determine if a file is of spec encoding and return special CodecInfo if it is"""
            if s != 'spec': return None
            return codecs.CodecInfo(
                  name='spec'
                , encode=utf8.encode
                , decode=decode
                , streamreader=StreamReader
                , streamwriter=utf8.streamwriter
                , incrementalencoder=utf8.incrementalencoder
                , incrementaldecoder=incrementaldecoder
                )

        # Do the register
        codecs.register(search_function)