예제 #1
0
 def check_mime(self, ctx):
     ctx.encoding = None
     # MIME-Version:
     mime_versions = ctx.metadata['MIME-Version']
     if len(mime_versions) > 1:
         self.tag('duplicate-header-field-mime-version')
         mime_versions = sorted(set(mime_versions))
     for mime_version in mime_versions:
         if mime_version != '1.0':
             self.tag('invalid-mime-version', mime_version, '=>', '1.0')
     if len(mime_versions) == 0:
         self.tag('no-mime-version-header-field',
                  tags.safestr('MIME-Version: 1.0'))
     # Content-Transfer-Encoding:
     ctes = ctx.metadata['Content-Transfer-Encoding']
     if len(ctes) > 1:
         self.tag('duplicate-header-field-content-transfer-encoding')
         ctes = sorted(set(ctes))
     for cte in ctes:
         if cte != '8bit':
             self.tag('invalid-content-transfer-encoding', cte, '=>',
                      '8bit')
     if len(ctes) == 0:
         self.tag('no-content-transfer-encoding-header-field',
                  tags.safestr('Content-Transfer-Encoding: 8bit'))
     # Content-Type:
     cts = ctx.metadata['Content-Type']
     if len(cts) > 1:
         self.tag('duplicate-header-field-content-type')
         cts = sorted(set(cts))
     elif len(cts) == 0:
         content_type_hint = 'text/plain; charset=<encoding>'
         self.tag('no-content-type-header-field',
                  tags.safestr('Content-Type: ' + content_type_hint))
         return
     encodings = set()
     for ct in cts:
         content_type_hint = 'text/plain; charset=<encoding>'
         match = re.search(r'(\Atext/plain; )?\bcharset=([^\s;]+)\Z', ct)
         if match:
             encoding = match.group(2)
             try:
                 is_ascii_compatible = encinfo.is_ascii_compatible_encoding(
                     encoding, missing_ok=False)
             except encinfo.EncodingLookupError:
                 if encoding == 'CHARSET':
                     if not ctx.is_template:
                         self.tag('boilerplate-in-content-type', ct)
                 else:
                     self.tag('unknown-encoding', encoding)
                 encoding = None
             else:
                 if not is_ascii_compatible:
                     self.tag('non-ascii-compatible-encoding', encoding)
                 elif encinfo.is_portable_encoding(encoding):
                     pass
                 else:
                     new_encoding = encinfo.propose_portable_encoding(
                         encoding)
                     if new_encoding is not None:
                         self.tag('non-portable-encoding', encoding, '=>',
                                  new_encoding)
                         encoding = new_encoding
                     else:
                         self.tag('non-portable-encoding', encoding)
                 if ctx.language is not None:
                     unrepresentable_characters = ctx.language.get_unrepresentable_characters(
                         encoding)
                     if unrepresentable_characters:
                         if len(unrepresentable_characters) > 5:
                             unrepresentable_characters[4:] = ['...']
                         self.tag('unrepresentable-characters', encoding,
                                  *unrepresentable_characters)
             if match.group(1) is None:
                 if encoding is not None:
                     content_type_hint = content_type_hint.replace(
                         '<encoding>', encoding)
                 self.tag('invalid-content-type', ct, '=>',
                          content_type_hint)
             if encoding is not None:
                 encodings.add(encoding)
         else:
             self.tag('invalid-content-type', ct, '=>', content_type_hint)
     if len(encodings) == 1:
         [ctx.encoding] = encodings
예제 #2
0
 def t(encoding):
     assert_false(E.is_ascii_compatible_encoding(encoding))
     assert_false(
         E.is_ascii_compatible_encoding(encoding, missing_ok=False))
예제 #3
0
 def _test_missing(self, encoding):
     assert_false(E.is_ascii_compatible_encoding(encoding))
     with assert_raises(E.EncodingLookupError):
         E.is_ascii_compatible_encoding(encoding, missing_ok=False)
예제 #4
0
 def _parse_entry(self, i, msgid_offset, msgstr_offset):
     view = self._view
     [length, offset] = self._read_ints(at=msgid_offset, n=2)
     msgid = view[offset:offset + length].tobytes()
     try:
         if view[offset + length] != b'\0':
             raise SyntaxError('msgid is not null-terminated')
     except IndexError:
         raise SyntaxError('truncated file')
     msgids = msgid.split(b'\0', 2)
     msgid = msgids[0]
     if len(msgids) > 2:
         raise SyntaxError('unexpected null byte in msgid')
     [length, offset] = self._read_ints(at=msgstr_offset, n=2)
     msgstr = view[offset:offset + length].tobytes()
     try:
         if view[offset + length] != b'\0':
             raise SyntaxError('msgstr is not null-terminated')
     except IndexError:
         raise SyntaxError('truncated file')
     msgstrs = msgstr.split(b'\0')
     if len(msgids) == 1 and len(msgstrs) > 1:
         raise SyntaxError('unexpected null byte in msgstr')
     encoding = self._encoding
     if i == 0:
         if encoding is None and msgid == b'':
             # http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/dcigettext.c?id=v0.18.3#n1106
             match = re.search(b'charset=([^ \t\n]+)', msgstr)
             if match is not None:
                 try:
                     encoding = match.group(1).decode('ASCII')
                 except UnicodeError:
                     pass
         if encoding is None:
             encoding = 'ASCII'
         else:
             if not encodings.is_ascii_compatible_encoding(encoding):
                 encoding = 'ASCII'
         self._encoding = encoding
     else:
         if msgids == self._last_msgid:
             raise SyntaxError('duplicate message definition')
         elif msgid < self._last_msgid:
             raise SyntaxError('messages are not sorted')
     self._last_msgid = msgid  # pylint: disable=attribute-defined-outside-init
     assert encoding is not None
     msgid, *msgctxt = msgid.split(b'\x04', 1)
     kwargs = dict(msgid=msgid.decode(encoding))
     if msgctxt:
         [msgctxt] = msgctxt
         kwargs.update(msgctxt=msgctxt.decode(encoding))
     if len(msgids) == 1:
         assert [msgstr] == msgstrs
         kwargs.update(msgstr=msgstr.decode(encoding))
     else:
         assert len(msgids) == 2
         assert len(msgstrs) >= 1
         kwargs.update(msgid_plural=msgids[1].decode(encoding))
         kwargs.update(msgstr_plural={
             i: s.decode(encoding)
             for i, s in enumerate(msgstrs)
         })
     entry = polib.MOEntry(**kwargs)
     entry.comment = None
     entry.occurrences = ()
     entry.flags = ()  # https://bitbucket.org/izi/polib/issues/47
     entry.translated = lambda: True
     entry.previous_msgctxt = None
     entry.previous_msgid = None
     entry.previous_msgid_plural = None
     return entry