def check_mime(self, ctx): ctx.encoding = None # MIME-Version: mime_versions = ctx.metadata['MIME-Version'] if len(mime_versions) > 1: self.tag('duplicate-header-field-mime-version') mime_versions = sorted(set(mime_versions)) for mime_version in mime_versions: if mime_version != '1.0': self.tag('invalid-mime-version', mime_version, '=>', '1.0') if len(mime_versions) == 0: self.tag('no-mime-version-header-field', tags.safestr('MIME-Version: 1.0')) # Content-Transfer-Encoding: ctes = ctx.metadata['Content-Transfer-Encoding'] if len(ctes) > 1: self.tag('duplicate-header-field-content-transfer-encoding') ctes = sorted(set(ctes)) for cte in ctes: if cte != '8bit': self.tag('invalid-content-transfer-encoding', cte, '=>', '8bit') if len(ctes) == 0: self.tag('no-content-transfer-encoding-header-field', tags.safestr('Content-Transfer-Encoding: 8bit')) # Content-Type: cts = ctx.metadata['Content-Type'] if len(cts) > 1: self.tag('duplicate-header-field-content-type') cts = sorted(set(cts)) elif len(cts) == 0: content_type_hint = 'text/plain; charset=<encoding>' self.tag('no-content-type-header-field', tags.safestr('Content-Type: ' + content_type_hint)) return encodings = set() for ct in cts: content_type_hint = 'text/plain; charset=<encoding>' match = re.search(r'(\Atext/plain; )?\bcharset=([^\s;]+)\Z', ct) if match: encoding = match.group(2) try: is_ascii_compatible = encinfo.is_ascii_compatible_encoding( encoding, missing_ok=False) except encinfo.EncodingLookupError: if encoding == 'CHARSET': if not ctx.is_template: self.tag('boilerplate-in-content-type', ct) else: self.tag('unknown-encoding', encoding) encoding = None else: if not is_ascii_compatible: self.tag('non-ascii-compatible-encoding', encoding) elif encinfo.is_portable_encoding(encoding): pass else: new_encoding = encinfo.propose_portable_encoding( encoding) if new_encoding is not None: self.tag('non-portable-encoding', encoding, '=>', new_encoding) encoding = new_encoding else: self.tag('non-portable-encoding', encoding) if ctx.language is not None: unrepresentable_characters = ctx.language.get_unrepresentable_characters( encoding) if unrepresentable_characters: if len(unrepresentable_characters) > 5: unrepresentable_characters[4:] = ['...'] self.tag('unrepresentable-characters', encoding, *unrepresentable_characters) if match.group(1) is None: if encoding is not None: content_type_hint = content_type_hint.replace( '<encoding>', encoding) self.tag('invalid-content-type', ct, '=>', content_type_hint) if encoding is not None: encodings.add(encoding) else: self.tag('invalid-content-type', ct, '=>', content_type_hint) if len(encodings) == 1: [ctx.encoding] = encodings
def t(encoding): assert_false(E.is_ascii_compatible_encoding(encoding)) assert_false( E.is_ascii_compatible_encoding(encoding, missing_ok=False))
def _test_missing(self, encoding): assert_false(E.is_ascii_compatible_encoding(encoding)) with assert_raises(E.EncodingLookupError): E.is_ascii_compatible_encoding(encoding, missing_ok=False)
def _parse_entry(self, i, msgid_offset, msgstr_offset): view = self._view [length, offset] = self._read_ints(at=msgid_offset, n=2) msgid = view[offset:offset + length].tobytes() try: if view[offset + length] != b'\0': raise SyntaxError('msgid is not null-terminated') except IndexError: raise SyntaxError('truncated file') msgids = msgid.split(b'\0', 2) msgid = msgids[0] if len(msgids) > 2: raise SyntaxError('unexpected null byte in msgid') [length, offset] = self._read_ints(at=msgstr_offset, n=2) msgstr = view[offset:offset + length].tobytes() try: if view[offset + length] != b'\0': raise SyntaxError('msgstr is not null-terminated') except IndexError: raise SyntaxError('truncated file') msgstrs = msgstr.split(b'\0') if len(msgids) == 1 and len(msgstrs) > 1: raise SyntaxError('unexpected null byte in msgstr') encoding = self._encoding if i == 0: if encoding is None and msgid == b'': # http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/dcigettext.c?id=v0.18.3#n1106 match = re.search(b'charset=([^ \t\n]+)', msgstr) if match is not None: try: encoding = match.group(1).decode('ASCII') except UnicodeError: pass if encoding is None: encoding = 'ASCII' else: if not encodings.is_ascii_compatible_encoding(encoding): encoding = 'ASCII' self._encoding = encoding else: if msgids == self._last_msgid: raise SyntaxError('duplicate message definition') elif msgid < self._last_msgid: raise SyntaxError('messages are not sorted') self._last_msgid = msgid # pylint: disable=attribute-defined-outside-init assert encoding is not None msgid, *msgctxt = msgid.split(b'\x04', 1) kwargs = dict(msgid=msgid.decode(encoding)) if msgctxt: [msgctxt] = msgctxt kwargs.update(msgctxt=msgctxt.decode(encoding)) if len(msgids) == 1: assert [msgstr] == msgstrs kwargs.update(msgstr=msgstr.decode(encoding)) else: assert len(msgids) == 2 assert len(msgstrs) >= 1 kwargs.update(msgid_plural=msgids[1].decode(encoding)) kwargs.update(msgstr_plural={ i: s.decode(encoding) for i, s in enumerate(msgstrs) }) entry = polib.MOEntry(**kwargs) entry.comment = None entry.occurrences = () entry.flags = () # https://bitbucket.org/izi/polib/issues/47 entry.translated = lambda: True entry.previous_msgctxt = None entry.previous_msgid = None entry.previous_msgid_plural = None return entry