def check_mime(self, ctx): ctx.encoding = None # MIME-Version: mime_versions = ctx.metadata['MIME-Version'] if len(mime_versions) > 1: self.tag('duplicate-header-field-mime-version') mime_versions = sorted(set(mime_versions)) for mime_version in mime_versions: if mime_version != '1.0': self.tag('invalid-mime-version', mime_version, '=>', '1.0') if len(mime_versions) == 0: self.tag('no-mime-version-header-field', tags.safestr('MIME-Version: 1.0')) # Content-Transfer-Encoding: ctes = ctx.metadata['Content-Transfer-Encoding'] if len(ctes) > 1: self.tag('duplicate-header-field-content-transfer-encoding') ctes = sorted(set(ctes)) for cte in ctes: if cte != '8bit': self.tag('invalid-content-transfer-encoding', cte, '=>', '8bit') if len(ctes) == 0: self.tag('no-content-transfer-encoding-header-field', tags.safestr('Content-Transfer-Encoding: 8bit')) # Content-Type: cts = ctx.metadata['Content-Type'] if len(cts) > 1: self.tag('duplicate-header-field-content-type') cts = sorted(set(cts)) elif len(cts) == 0: content_type_hint = 'text/plain; charset=<encoding>' self.tag('no-content-type-header-field', tags.safestr('Content-Type: ' + content_type_hint)) return encodings = set() for ct in cts: content_type_hint = 'text/plain; charset=<encoding>' match = re.search(r'(\Atext/plain; )?\bcharset=([^\s;]+)\Z', ct) if match: encoding = match.group(2) try: is_ascii_compatible = encinfo.is_ascii_compatible_encoding( encoding, missing_ok=False) except encinfo.EncodingLookupError: if encoding == 'CHARSET': if not ctx.is_template: self.tag('boilerplate-in-content-type', ct) else: self.tag('unknown-encoding', encoding) encoding = None else: if not is_ascii_compatible: self.tag('non-ascii-compatible-encoding', encoding) elif encinfo.is_portable_encoding(encoding): pass else: new_encoding = encinfo.propose_portable_encoding( encoding) if new_encoding is not None: self.tag('non-portable-encoding', encoding, '=>', new_encoding) encoding = new_encoding else: self.tag('non-portable-encoding', encoding) if ctx.language is not None: unrepresentable_characters = ctx.language.get_unrepresentable_characters( encoding) if unrepresentable_characters: if len(unrepresentable_characters) > 5: unrepresentable_characters[4:] = ['...'] self.tag('unrepresentable-characters', encoding, *unrepresentable_characters) if match.group(1) is None: if encoding is not None: content_type_hint = content_type_hint.replace( '<encoding>', encoding) self.tag('invalid-content-type', ct, '=>', content_type_hint) if encoding is not None: encodings.add(encoding) else: self.tag('invalid-content-type', ct, '=>', content_type_hint) if len(encodings) == 1: [ctx.encoding] = encodings
def test_notfound(self): portable_encoding = E.propose_portable_encoding('ISO-8859-16') assert_is_none(portable_encoding)
def test_identity(self): encoding = 'ISO-8859-2' portable_encoding = E.propose_portable_encoding(encoding) assert_equal(portable_encoding, encoding)
def t(encoding, expected_portable_encoding): portable_encoding = E.propose_portable_encoding(encoding) assert_equal(portable_encoding, expected_portable_encoding)
def test_8859(self): E.install_extra_encodings() encoding = '8859-2' portable_encoding = E.propose_portable_encoding(encoding) assert_equal('ISO-' + encoding, portable_encoding)