def test_decode(self): self.assertEqual(idna.decode('xn--zckzah.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') self.assertEqual(idna.decode(u'\u30c6\u30b9\u30c8.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') self.assertEqual(idna.decode(u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') self.assertEqual(idna.decode('abc.abc'), u'abc.abc') self.assertEqual(idna.decode('xn---------90gglbagaar.aa'), u'\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa') self.assertRaises(idna.IDNAError, idna.decode, 'XN---------90GGLBAGAAC.AA') self.assertRaises(idna.IDNAError, idna.decode, 'xn---------90gglbagaac.aa')
def normalize(self, hostname): if not is_hostname(hostname): raise ObservableValidationError("Invalid Hostname (is_hostname={}): {}".format(is_hostname(hostname), hostname)) if hostname.endswith('.'): hostname = hostname[:-1] self.idna = unicode(idna.encode(hostname.lower())) self.value = unicode(idna.decode(hostname.lower()))
def run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains, domains_with_a_records, ssl_certificates): output = BufferedOutput() # The domain is IDNA-encoded in the database, but for display use Unicode. try: domain_display = idna.decode(domain.encode('ascii')) output.add_heading(domain_display) except (ValueError, UnicodeError, idna.IDNAError) as e: # Looks like we have some invalid data in our database. output.add_heading(domain) output.print_error("Domain name is invalid: " + str(e)) if domain == env["PRIMARY_HOSTNAME"]: check_primary_hostname_dns(domain, env, output, dns_domains, dns_zonefiles) if domain in dns_domains: check_dns_zone(domain, env, output, dns_zonefiles) if domain in mail_domains: check_mail_domain(domain, env, output) if domain in web_domains: check_web_domain(domain, rounded_time, ssl_certificates, env, output) if domain in dns_domains: check_dns_zone_suggestions(domain, env, output, dns_zonefiles, domains_with_a_records) return (domain, output)
def get_domain(emailaddr, as_unicode=True): # Gets the domain part of an email address. Turns IDNA # back to Unicode for display. ret = emailaddr.split("@", 1)[1] if as_unicode: ret = idna.decode(ret.encode("ascii")) return ret
def decode_punycode(label): if label.startswith("xn--"): try: return idna.decode(label.encode('ascii')) except UnicodeError: pass return label
def can_provision_for_domain(domain): # Let's Encrypt doesn't yet support IDNA domains. # We store domains in IDNA (ASCII). To see if this domain is IDNA, # we'll see if its IDNA-decoded form is different. if idna.decode(domain.encode("ascii")) != domain: problems[domain] = "Let's Encrypt does not yet support provisioning certificates for internationalized domains." return False # Does the domain resolve to this machine in public DNS? If not, # we can't do domain control validation. For IPv6 is configured, # make sure both IPv4 and IPv6 are correct because we don't know # how Let's Encrypt will connect. import dns.resolver for rtype, value in [("A", env["PUBLIC_IP"]), ("AAAA", env.get("PUBLIC_IPV6"))]: if not value: continue # IPv6 is not configured try: # Must make the qname absolute to prevent a fall-back lookup with a # search domain appended, by adding a period to the end. response = dns.resolver.query(domain + ".", rtype) except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e: problems[domain] = "DNS isn't configured properly for this domain: DNS resolution failed (%s: %s)." % (rtype, str(e) or repr(e)) # NoAnswer's str is empty return False except Exception as e: problems[domain] = "DNS isn't configured properly for this domain: DNS lookup had an error: %s." % str(e) return False if len(response) != 1 or str(response[0]) != value: problems[domain] = "Domain control validation cannot be performed for this domain because DNS points the domain to another machine (%s %s)." % (rtype, ", ".join(str(r) for r in response)) return False return True
def normalize(self, hostname): hostname = Hostname.check_type(hostname) if not hostname: raise ObservableValidationError("Invalid Hostname (check_type={}): {}".format(Hostname.check_type(hostname), hostname)) self.idna = unicode(idna.encode(hostname.lower())) self.value = unicode(idna.decode(hostname.lower()))
def decode_punycode(label): if PUNY_RE.match(label): try: return idna.decode(label.encode('ascii')) except UnicodeError: pass return label
def _getPageByProtocol(self, href): """ Возвращает страницу, если href - протокол вида page://, и None в противном случае """ protocol = u"page://" page = None # Если есть якорь, то отсечем его anchorpos = href.rfind("/#") if anchorpos != -1: href = href[:anchorpos] if href.startswith(protocol): uid = href[len(protocol):] try: uid = idna.decode(uid) except UnicodeError: # Под IE ссылки не преобразуются в кодировку IDNA pass if uid.endswith("/"): uid = uid[:-1] page = Application.pageUidDepot[uid] return page
def _build_general_name(backend, gn): if gn.type == backend._lib.GEN_DNS: data = backend._ffi.buffer(gn.d.dNSName.data, gn.d.dNSName.length)[:] return x509.DNSName(idna.decode(data)) elif gn.type == backend._lib.GEN_URI: data = backend._ffi.buffer( gn.d.uniformResourceIdentifier.data, gn.d.uniformResourceIdentifier.length )[:].decode("ascii") parsed = urllib_parse.urlparse(data) hostname = idna.decode(parsed.hostname) if parsed.port: netloc = hostname + u":" + six.text_type(parsed.port) else: netloc = hostname # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. uri = urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment )) return x509.UniformResourceIdentifier(uri) elif gn.type == backend._lib.GEN_RID: oid = _obj2txt(backend, gn.d.registeredID) return x509.RegisteredID(x509.ObjectIdentifier(oid)) elif gn.type == backend._lib.GEN_IPADD: return x509.IPAddress( ipaddress.ip_address( backend._ffi.buffer( gn.d.iPAddress.data, gn.d.iPAddress.length )[:] ) ) else: # otherName, x400Address or ediPartyName raise x509.UnsupportedGeneralNameType( "{0} is not a supported type".format( x509._GENERAL_NAMES.get(gn.type, gn.type) ), gn.type )
def __init__(self, domain_name): """ This function receives a domain name, in either Unicode or punycode format, and transforms it into a This function creates and parses the domain name provided by the application. First we should detect if the domain given is an IDN, and if so, parse the domain correctly and give it a punycode and it's correct UTF-8 representation. :param domain_name: The domain name to parse (can by puny or IDN) """ is_idn = False # Proper handling for Py2/3, convert from bytes into Unicode if type(domain_name) == bytes: domain_name = domain_name.decode('utf-8') # Parse the second level domain (SLD) and TLD individually domain_parts = domain_name.split('.') for i, domain_part in enumerate(domain_parts): if 'xn-' in domain_part: is_idn = True domain_parts[i] = idna.decode(domain_part) domain = domain_name idn = idna.decode(domain) # Python throws an error if there are characters outside the ASCII # charset, if it does, then we know it's Unicode try: domain_name.encode('ascii') except UnicodeEncodeError: is_idn = True domain = idna.encode(domain_name) domain = domain.decode('utf-8') idn = domain_name self._domain = tldextract.extract(domain) self._is_idn = is_idn self._idn = tldextract.extract(idn) if is_idn else None
def can_provision_for_domain(domain): # Let's Encrypt doesn't yet support IDNA domains. # We store domains in IDNA (ASCII). To see if this domain is IDNA, # we'll see if its IDNA-decoded form is different. if idna.decode(domain.encode("ascii")) != domain: problems[ domain ] = "Let's Encrypt does not yet support provisioning certificates for internationalized domains." return False # Does the domain resolve to this machine in public DNS? If not, # we can't do domain control validation. For IPv6 is configured, # make sure both IPv4 and IPv6 are correct because we don't know # how Let's Encrypt will connect. import dns.resolver for rtype, value in [("A", env["PUBLIC_IP"]), ("AAAA", env.get("PUBLIC_IPV6"))]: if not value: continue # IPv6 is not configured try: # Must make the qname absolute to prevent a fall-back lookup with a # search domain appended, by adding a period to the end. response = dns.resolver.query(domain + ".", rtype) except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e: problems[domain] = "DNS isn't configured properly for this domain: DNS resolution failed (%s: %s)." % ( rtype, str(e) or repr(e), ) # NoAnswer's str is empty return False except Exception as e: problems[domain] = "DNS isn't configured properly for this domain: DNS lookup had an error: %s." % str( e ) return False # Unfortunately, the response.__str__ returns bytes # instead of string, if it resulted from an AAAA-query. # We need to convert manually, until this is fixed: # https://github.com/rthalley/dnspython/issues/204 # # BEGIN HOTFIX def rdata__str__(r): s = r.to_text() if isinstance(s, bytes): s = s.decode("utf-8") return s # END HOTFIX if len(response) != 1 or rdata__str__(response[0]) != value: problems[domain] = ( "Domain control validation cannot be performed for this domain because DNS points the domain to another machine (%s %s)." % (rtype, ", ".join(rdata__str__(r) for r in response)) ) return False return True
def prettify_idn_email_address(email): # This is the opposite of sanitize_idn_email_address. We store domain # names in IDNA in the database, but we want to show Unicode to the user. try: localpart, domainpart = email.split("@") domainpart = idna.decode(domainpart.encode("ascii")) return localpart + "@" + domainpart except (UnicodeError, idna.IDNAError): # Failed to decode IDNA. Should never happen. return email
def get_domain(emailaddr, as_unicode=True): # Gets the domain part of an email address. Turns IDNA # back to Unicode for display. ret = emailaddr.split('@', 1)[1] if as_unicode: try: ret = idna.decode(ret.encode('ascii')) except (ValueError, UnicodeError, idna.IDNAError): # Looks like we have an invalid email address in # the database. Now is not the time to complain. pass return ret
def runTest(self): if not self.fields: return try: types, source, to_unicode, to_ascii = (unicode_fixup(field) for field in self.fields[:4]) if (unicode_fixup(u"\\uD804\\uDC39") in source and sys.version_info[0] < 3): raise unittest.SkipTest( "Python 2's Unicode support is too old for this test") except ValueError: raise unittest.SkipTest( "Test requires Python wide Unicode support") if source in _SKIP_TESTS: return if not to_unicode: to_unicode = source if not to_ascii: to_ascii = to_unicode nv8 = (len(self.fields) > 4 and self.fields[4]) try: output = idna.decode(source, uts46=True, strict=True) if to_unicode[0] == u"[": self.fail("decode() did not emit required error {0} for {1}".format(to_unicode, repr(source))) self.assertEqual(output, to_unicode, "unexpected decode() output") except (idna.IDNAError, UnicodeError, ValueError) as exc: if unicode(exc).startswith(u"Unknown"): raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports") if to_unicode[0] != u"[" and not nv8: raise for transitional in { u"B": (True, False), u"T": (True,), u"N": (False,), }[types]: try: output = idna.encode(source, uts46=True, strict=True, transitional=transitional).decode("ascii") if to_ascii[0] == u"[": self.fail( "encode(transitional={0}) did not emit required error {1} for {2}". format(transitional, to_ascii, repr(source))) self.assertEqual(output, to_ascii, "unexpected encode(transitional={0}) output". format(transitional)) except (idna.IDNAError, UnicodeError, ValueError) as exc: if unicode(exc).startswith(u"Unknown"): raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports") if to_ascii[0] != u"[" and not nv8: raise
def runTest(self): if not self.fields: return try: types, source, to_unicode, to_ascii = ( unicode_fixup(field) for field in self.fields[:4]) if (unicode_fixup(u"\\uD804\\uDC39") in source and sys.version_info[0] < 3): raise unittest.SkipTest( "Python 2's Unicode support is too old for this test") except ValueError: raise unittest.SkipTest( "Test requires Python wide Unicode support") if not to_unicode: to_unicode = source if not to_ascii: to_ascii = to_unicode nv8 = (len(self.fields) > 4 and self.fields[4] or self.lineno in _MISSING_NV8) try: output = idna.decode(source, uts46=True, strict=True) if to_unicode[0] == u"[": self.fail("decode() did not emit required error") self.assertEqual(output, to_unicode, "unexpected decode() output") except (idna.IDNAError, UnicodeError, ValueError) as exc: if unicode(exc).startswith(u"Unknown directionality"): raise unittest.SkipTest( "Test requires support for a newer" " version of Unicode than this Python supports") if to_unicode[0] != u"[" and not nv8: raise for transitional in { u"B": (True, False), u"T": (True, ), u"N": (False, ), }[types]: try: output = idna.encode(source, uts46=True, strict=True, transitional=transitional).decode("ascii") if to_ascii[0] == u"[": self.fail( "encode(transitional={0}) did not emit required error". format(transitional)) self.assertEqual( output, to_ascii, "unexpected encode(transitional={0}) output".format( transitional)) except (idna.IDNAError, UnicodeError, ValueError): if to_ascii[0] != u"[" and not nv8: raise
def host(self): """Decoded host part of URL. None for relative URLs. """ raw = self.raw_host if raw is None: return None try: return idna.decode(raw.encode('ascii')) except UnicodeError: # e.g. '::1' return raw.encode('ascii').decode('idna')
def test_decode(self): self.assertEqual(idna.decode('xn--zckzah.xn--zckzah'), '\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') self.assertEqual(idna.decode('\u30c6\u30b9\u30c8.xn--zckzah'), '\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') self.assertEqual(idna.decode('\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'), '\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') self.assertEqual(idna.decode('abc.abc'), 'abc.abc') self.assertEqual( idna.decode('xn---------90gglbagaar.aa'), '\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa') self.assertRaises(idna.IDNAError, idna.decode, 'XN---------90GGLBAGAAC.AA') self.assertRaises(idna.IDNAError, idna.decode, 'xn---------90gglbagaac.aa') self.assertRaises(idna.IDNAError, idna.decode, 'xn--') self.assertRaises(idna.IDNAError, idna.decode, b'\x8d\xd2') self.assertRaises( idna.IDNAError, idna.decode, b'A.A.0.a.a.A.0.a.A.A.0.a.A.0A.2.a.A.A.0.a.A.0.A.a.A0.a.a.A.0.a.fB.A.A.a.A.A.B.A.A.a.A.A.B.A.A.a.A.A.0.a.A.a.a.A.A.0.a.A.0.A.a.A0.a.a.A.0.a.fB.A.A.a.A.A.B.0A.A.a.A.A.B.A.A.a.A.A.a.A.A.B.A.A.a.A.0.a.B.A.A.a.A.B.A.a.A.A.5.a.A.0.a.Ba.A.B.A.A.a.A.0.a.Xn--B.A.A.A.a' )
def host(self): """Decoded host part of URL. None for relative URLs. """ raw = self.raw_host if raw is None: return None try: return idna.decode(raw.encode('ascii')) except idna.core.InvalidCodepoint: # e.g. '::1' return raw
def decode_punycode(label): """helper function; decodes a section of the netloc from punycode.""" try: return idna.decode(label.encode('ascii')) except UnicodeError: pass except ValueError as exc: # see https://github.com/john-kurkowski/tldextract/issues/122 if "narrow Python build" in exc.args[0]: warnings.warn("can not decode punycode: %s" % exc.args[0],UnicodeWarning, stacklevel=2) return label raise return label
def pretty_domain_name(dname): """ Return a pretty printable domain name. If *dname* is in punnycode, decode it. """ try: pretty = dname pretty = idna.decode(dname.encode("ascii")) except (UnicodeError, idna.IDNAError): pass return pretty
def kick(env, mail_result=None): results = [] # Include the current operation's result in output. if mail_result is not None: results.append(mail_result + "\n") auto_aliases = {} # Mape required aliases to the administrator alias (which should be created manually). administrator = get_system_administrator(env) required_aliases = get_required_aliases(env) for alias in required_aliases: if alias == administrator: continue # don't make an alias from the administrator to itself --- this alias must be created manually auto_aliases[alias] = administrator # Add domain maps from Unicode forms of IDNA domains to the ASCII forms stored in the alias table. for domain in get_mail_domains(env): try: domain_unicode = idna.decode(domain.encode("ascii")) if domain == domain_unicode: continue # not an IDNA/Unicode domain auto_aliases["@" + domain_unicode] = "@" + domain except (ValueError, UnicodeError, idna.IDNAError): continue add_auto_aliases(auto_aliases, env) # Remove auto-generated postmaster/admin/abuse alises from the main aliases table. # They are now stored in the auto_aliases table. for address, forwards_to, permitted_senders, auto in get_mail_aliases(env): user, domain = address.split("@") if user in ("postmaster", "admin", "abuse") \ and address not in required_aliases \ and forwards_to == get_system_administrator(env) \ and not auto: remove_mail_alias(address, env, do_kick=False) results.append( "removed alias %s (was to %s; domain no longer used for email)\n" % (address, forwards_to)) # Update DNS and nginx in case any domains are added/removed. from dns_update import do_dns_update results.append(do_dns_update(env)) from web_update import do_web_update results.append(do_web_update(env)) return "".join(s for s in results if s != "")
def create_cli(domains=[]): cli = [] domains = list(domains) if sys.stdout.encoding.lower() == 'utf-8': for domain in domains: name = domain['domain-name'] domain['domain-name'] = idna.decode(name) width_fuzzer = max([len(x['fuzzer']) for x in domains]) + 1 width_domain = max([len(x['domain-name']) for x in domains]) + 1 for domain in domains: info = [] if 'dns-a' in domain: if 'geoip-country' in domain: info.append(';'.join(domain['dns-a']) + FG_CYA + '/' + domain['geoip-country'].replace(' ', '') + FG_RST) else: info.append(';'.join(domain['dns-a'])) if 'dns-aaaa' in domain: info.append(';'.join(domain['dns-aaaa'])) if 'dns-ns' in domain: info.append(FG_YEL + 'NS:' + FG_CYA + ';'.join(domain['dns-ns']) + FG_RST) if 'dns-mx' in domain: if 'mx-spy' in domain: info.append(FG_YEL + 'SPYING-MX:' + FG_CYA + ';'.join(domain['dns-mx']) + FG_RST) else: info.append(FG_YEL + 'MX:' + FG_CYA + ';'.join(domain['dns-mx']) + FG_RST) if 'banner-http' in domain: info.append(FG_YEL + 'HTTP:' + FG_CYA + domain['banner-http'] + FG_RST) if 'banner-smtp' in domain: info.append(FG_YEL + 'SMTP:' + FG_CYA + domain['banner-smtp'] + FG_RST) if 'whois-registrar' in domain: info.append(FG_YEL + 'REGISTRAR:' + FG_CYA + domain['whois-registrar'] + FG_RST) if 'whois-created' in domain: info.append(FG_YEL + 'CREATED:' + FG_CYA + domain['whois-created'] + FG_RST) if domain.get('ssdeep-score', 0) > 0: info.append(FG_YEL + 'SSDEEP:' + str(domain['ssdeep-score']) + FG_RST) if not info: info = ['-'] cli.append(' '.join([ FG_BLU + domain['fuzzer'].ljust(width_fuzzer) + FG_RST, domain['domain-name'].ljust(width_domain), ' '.join(info) ])) return '\n'.join(cli)
def __init__(self, raw_display_name=None, raw_addr_spec=None, _display_name=None, _mailbox=None, _hostname=None): raw_display_name = _to_parser_input(raw_display_name) raw_addr_spec = _to_parser_input(raw_addr_spec) if raw_display_name and raw_addr_spec: mailbox = addr_spec_parser.parse(raw_addr_spec, lexer.clone()) self._display_name = _to_text(raw_display_name) self._mailbox = _to_text(mailbox.local_part) self._hostname = _to_text(mailbox.domain) elif raw_display_name: mailbox = mailbox_parser.parse(raw_display_name, lexer.clone()) self._display_name = _to_text(mailbox.display_name) self._mailbox = _to_text(mailbox.local_part) self._hostname = _to_text(mailbox.domain) elif raw_addr_spec: mailbox = addr_spec_parser.parse(raw_addr_spec, lexer.clone()) self._display_name = u'' self._mailbox = _to_text(mailbox.local_part) self._hostname = _to_text(mailbox.domain) elif _mailbox and _hostname: self._display_name = _display_name or u'' self._mailbox = _mailbox self._hostname = _hostname else: raise SyntaxError('failed to create EmailAddress: bad parameters') # Convert display name to decoded unicode string. if (self._display_name.startswith('=?') and self._display_name.endswith('?=')): self._display_name = mime_to_unicode(self._display_name) if (self._display_name.startswith('"') and self._display_name.endswith('"') and len(self._display_name) > 2): self._display_name = smart_unquote(self._display_name) # Convert hostname to lowercase unicode string. self._hostname = self._hostname.lower() if self._hostname.startswith('xn--') or '.xn--' in self._hostname: self._hostname = idna.decode(self._hostname) if not is_pure_ascii(self._hostname): idna.encode(self._hostname) assert isinstance(self._display_name, six.text_type) assert isinstance(self._mailbox, six.text_type) assert isinstance(self._hostname, six.text_type)
def can_provision_for_domain(domain): # Let's Encrypt doesn't yet support IDNA domains. # We store domains in IDNA (ASCII). To see if this domain is IDNA, # we'll see if its IDNA-decoded form is different. if idna.decode(domain.encode("ascii")) != domain: problems[ domain] = "Let's Encrypt does not yet support provisioning certificates for internationalized domains." return False # Does the domain resolve to this machine in public DNS? If not, # we can't do domain control validation. For IPv6 is configured, # make sure both IPv4 and IPv6 are correct because we don't know # how Let's Encrypt will connect. import dns.resolver for rtype, value in [("A", env["PUBLIC_IP"]), ("AAAA", env.get("PUBLIC_IPV6"))]: if not value: continue # IPv6 is not configured try: # Must make the qname absolute to prevent a fall-back lookup with a # search domain appended, by adding a period to the end. response = dns.resolver.query(domain + ".", rtype) except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e: problems[ domain] = "DNS isn't configured properly for this domain: DNS resolution failed (%s: %s)." % ( rtype, str(e) or repr(e)) # NoAnswer's str is empty return False except Exception as e: problems[ domain] = "DNS isn't configured properly for this domain: DNS lookup had an error: %s." % str( e) return False # Unfortunately, the response.__str__ returns bytes # instead of string, if it resulted from an AAAA-query. # We need to convert manually, until this is fixed: # https://github.com/rthalley/dnspython/issues/204 # # BEGIN HOTFIX if isinstance(response[0].to_text(), bytes): response = [response[0].to_text().decode('utf-8')] # END HOTFIX if len(response) != 1 or str(response[0]) != value: problems[ domain] = "Domain control validation cannot be performed for this domain because DNS points the domain to another machine (%s %s)." % ( rtype, ", ".join(str(r) for r in response)) return False return True
def decode_punycode(domain): """ 检测传入的域名是否是punycode编码格式,是的话,将其转换成对应的中文域名 :return: """ convert_domain = domain # 对中文域名进行转换 if "xn--" in domain: # 将punycode转换成中文 convert_domain = idna.decode(domain) return convert_domain
def _build_general_name(backend, gn): if gn.type == backend._lib.GEN_DNS: data = backend._ffi.buffer(gn.d.dNSName.data, gn.d.dNSName.length)[:] return x509.DNSName(idna.decode(data)) elif gn.type == backend._lib.GEN_RID: oid = _obj2txt(backend, gn.d.registeredID) return x509.RegisteredID(x509.ObjectIdentifier(oid)) else: # otherName, x400Address or ediPartyName raise x509.UnsupportedGeneralNameType( "{0} is not a supported type".format( x509._GENERAL_NAMES.get(gn.type, gn.type) ), gn.type )
def value(self): warnings.warn( "UniformResourceIdentifier.bytes_value should be used instead of " "UniformResourceIdentifier.value; it contains the name as raw " "bytes, instead of as an idna-decoded unicode string. " "UniformResourceIdentifier.value will be removed in a future " "version.", utils.DeprecatedIn21, stacklevel=2) parsed = urllib_parse.urlparse(self.bytes_value) if not parsed.hostname: netloc = "" elif parsed.port: netloc = idna.decode(parsed.hostname) + ":{0}".format(parsed.port) else: netloc = idna.decode(parsed.hostname) # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. return urllib_parse.urlunparse( (parsed.scheme.decode('utf8'), netloc, parsed.path.decode('utf8'), parsed.params.decode('utf8'), parsed.query.decode('utf8'), parsed.fragment.decode('utf8')))
def netloc_unicode(self) -> str: if self._netloc_unicode is None: if any(ord(char) >= 128 for char in self.split_value.netloc): self._netloc_unicode = self.split_value.netloc.lower() return self._netloc_unicode try: self._netloc_unicode = idna.decode( self.split_value.netloc).lower() return self._netloc_unicode except idna.core.IDNAError: self._netloc_unicode = self.split_value.netloc.encode( "utf-8", errors="ignore").decode("idna").lower() return self._netloc_unicode return self._netloc_unicode
def parse_domain(url: str) -> str: full_hostname = urlparse(url).netloc full_hostname = RE_HOST_WITH_PORT.sub('', full_hostname) # for cutting ports - "https://example.com:8080" if RE_IP_ADDR.match(full_hostname): # checking is the hostname ip address return full_hostname if full_hostname.startswith('xn--'): # 3. idna decode (xn----7sbgbbkedzyymg.xn--p1ai) full_hostname = idna.decode(full_hostname) parts = full_hostname.split('.') try: domain = f'{parts[-2]}.{parts[-1]}' except IndexError: raise InvalidUrl(url) return domain
def get_website_link(): """Возвращает ссылку на главную страницу сайта.""" try: # Для доменов в зоне рф конвертируем из idna. return "https://" + str( idna.decode( link.find("div", { "class": "path" }).find("b").get_text())) except InvalidCodepoint: return ("https://" + link.find("div", { "class": "path" }).find("b").get_text()) except IDNAError: return ("https://" + link.find("div", { "class": "path" }).find("b").get_text())
def _translate(domain): # 对domain进行处理:转成小写、去掉前后的多余字符、中文域名的转换等 # 将传入的域名转换成小写 domain = domain.lower() # 将传入的域名去掉多余的字符 domain = domain.strip() domain = domain.expandtabs() # 将tab替换为空格 domain = domain.replace(" ", "") # 将空格删除 # 对中文域名进行转换 if "xn--" in domain: # 将punycode转换成中文 domain = idna.decode(domain) return domain
def decodeIDNA(self, link): """ Decode link like protocol://xn--80afndtacjikc """ if link is None: return None protocol = self._getLinkProtocol(link) if protocol is not None: url = link[len(protocol):] try: link = u"{}{}".format(protocol, idna.decode(url)) except UnicodeError: # Под IE ссылки не преобразуются в кодировку IDNA pass return link
def normalize_name(name): ''' Clean the fully qualified name, as defined in ENS `EIP-137 <https://github.com/ethereum/EIPs/blob/master/EIPS/eip-137.md#name-syntax>`_ This does *not* enforce whether ``name`` is a label or fully qualified domain. :param str name: the dot-separated ENS name :raises InvalidName: if ``name`` has invalid syntax ''' if not name: return name elif isinstance(name, (bytes, bytearray)): name = name.decode('utf-8') try: return idna.decode(name, uts46=True, std3_rules=True) except idna.IDNAError as exc: raise InvalidName("%s is an invalid name, because %s" % (name, exc)) from exc
def _idnaText(octets): """ Convert some IDNA-encoded octets into some human-readable text. Currently only used by the tests. @param octets: Some bytes representing a hostname. @type octets: L{bytes} @return: A human-readable domain name. @rtype: L{unicode} """ try: import idna except ImportError: return octets.decode("idna") else: return idna.decode(octets)
def host(self): """Decoded host part of URL. None for relative URLs. """ raw = self.raw_host if raw is None: return None if "%" in raw: # Hack for scoped IPv6 addresses like # fe80::2%Проверка # presence of '%' sign means only IPv6 address, so idna is useless. return raw try: return idna.decode(raw.encode("ascii")) except UnicodeError: # e.g. '::1' return raw.encode("ascii").decode("idna")
def value(self): warnings.warn( "RFC822Name.bytes_value should be used instead of RFC822Name.value" "; it contains the name as raw bytes, instead of as an idna-" "decoded unicode string. RFC822Name.value will be removed in a " "future version.", utils.DeprecatedIn21, stacklevel=2) _, address = parseaddr(self.bytes_value.decode("ascii")) parts = address.split(u"@") if len(parts) == 1: # Single label email name. This is valid for local delivery. # No IDNA decoding needed since there is no domain component. return address else: # A normal email of the form [email protected]. Let's attempt to # encode the domain component and reconstruct the address. return parts[0] + u"@" + idna.decode(parts[1])
def shorten(target, code=None, hidden=None, item_type="link", internal=False): target_parse = urlparse(target) if target_parse.netloc in (urlparse(request.base_url).netloc, "") and not internal: return None target = urlunparse(target_parse) if code is None: code = util.get_code(item_type, do_random=hidden) is_custom_code = True else: is_custom_code = False data = { "type": "url", "code": code, "url": target, "ts": str(time.time()), "clicked": 0 } retries = 0 while retries < 5: try: links.insert_one(data) break except DuplicateKeyError: if is_custom_code: return None else: retries += 1 parsed = urlparse(url_for("link", _external=True, code=code)) if config["link_shortener"]["convert_punycode"]: try: new_host = idna.decode(parsed.hostname) parsed._replace(hostname=new_host) except ValueError: pass url = urlunparse(parsed) return url
def __call__(self, url): """ Takes a string URL and splits it into its subdomain, domain, and suffix (effective TLD, gTLD, ccTLD, etc.) component. >>> extract = TLDExtract() >>> extract('http://forums.news.cnn.com/') ExtractResult(subdomain='forums.news', domain='cnn', suffix='com') >>> extract('http://forums.bbc.co.uk/') ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk') """ netloc = SCHEME_RE.sub("", url) \ .partition("/")[0] \ .partition("?")[0] \ .partition("#")[0] \ .split("@")[-1] \ .partition(":")[0] \ .strip() \ .rstrip(".") labels = netloc.split(".") translations = [] for label in labels: if label.startswith("xn--"): try: translation = idna.decode(label.encode('ascii')) except UnicodeError: translation = label else: translation = label translation = translation.lower() translations.append(translation) suffix_index = self._get_tld_extractor().suffix_index(translations) registered_domain = ".".join(labels[:suffix_index]) tld = ".".join(labels[suffix_index:]) if not tld and netloc and looks_like_ip(netloc): return ExtractResult('', netloc, '') subdomain, _, domain = registered_domain.rpartition('.') return ExtractResult(subdomain, domain, tld)
def _decodeIDNA(self, link): """ Decode link like protocol://xn--80afndtacjikc """ if link is None: return None protocol = self._getLinkProtocol(link) if protocol is not None: url = link[len(protocol):] try: link = u"{}{}".format( protocol, idna.decode(url)) except UnicodeError: # Под IE ссылки не преобразуются в кодировку IDNA pass return link
def TestOneInput(input_bytes): fdp = atheris.FuzzedDataProvider(input_bytes) original = fdp.ConsumeUnicode(253) original = unicodedata.normalize("NFC", original) should_fail = ShouldFail(original) try: encoded = idna.encode(original, strict=True).lower() if should_fail: raise RuntimeError( ("Input '%s' is invalid, should have failed; " "however, actually encoded to '%s'") % (original, encoded)) # These errors are very complex would essentially require the idna package to # be reimplemented in order to correctly implement, so we assume they are # valid. except idna.core.InvalidCodepoint as e: return except idna.core.InvalidCodepointContext as e: return except idna.core.IDNAError as e: if str(e).startswith("Unknown codepoint adjacent to"): return if should_fail: return codepoints = [ord(x) for x in original.lower()] sys.stderr.write("Input: %s\nCodepoints: %s\n" % (original, codepoints)) raise except BaseException as e: if should_fail: return codepoints = [ord(x) for x in original.lower()] sys.stderr.write("Input: %s\nCodepoints: %s\n" % (original, codepoints)) raise decoded = idna.decode(encoded) CompareEncodedWithLibidn2(original, encoded) CompareDecodedWithLibidn2(original, encoded, decoded)
async def hello(): try: hostname = request.headers['X-Forwarded-Host'] except KeyError: try: hostname = request.headers['Host'] if hostname.startswith('127.') or hostname == 'localhost': hostname = 'local-user.theinternet.lol' except KeyError: hostname = 'unknown.theinternet.lol' if '--' in name: try: hostname = idna.decode(hostname) except: traceback.print_exc() name = hostname.split('.')[0] return home.render(name=name.title().replace('-', ' '))
async def _download_tlds_list(self): tld_list = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt' set_of_tlds = set() async with self._session.get(tld_list) as r: text = await r.text() for line in text.split('\n'): tld = line.strip().lower() # skip empty lines if not tld: continue # skip comments if tld[0] == '#': continue set_of_tlds.add("." + tld) set_of_tlds.add("." + idna.decode(tld)) self.__tlds = set_of_tlds
def create_cli(domains=[]): cli = [] domains = list(domains) if sys.stdout.encoding.lower() == 'utf-8': for domain in domains: name = domain['domain'] domain['domain'] = idna.decode(name) wfuz = max([len(x.get('fuzzer', '')) for x in domains]) + 1 wdom = max([len(x.get('domain', '')) for x in domains]) + 1 kv = lambda k, v: FG_YEL + k + FG_CYA + v + FG_RST if k else FG_CYA + v + FG_RST for domain in domains: inf = [] if 'dns_a' in domain: inf.append(';'.join(domain['dns_a']) + (kv('/', domain['geoip'].replace(' ', '')) if 'geoip' in domain else '')) if 'dns_aaaa' in domain: inf.append(';'.join(domain['dns_aaaa'])) if 'dns_ns' in domain: inf.append(kv('NS:', ';'.join(domain['dns_ns']))) if 'dns_mx' in domain: inf.append( kv('SPYING-MX:' if domain.get('mx_spy') else 'MX:', ';'.join(domain['dns_mx']))) if 'banner_http' in domain: inf.append(kv('HTTP:', domain['banner_http'])) if 'banner_smtp' in domain: inf.append(kv('SMTP:', domain['banner_smtp'])) if 'whois_registrar' in domain: inf.append(kv('REGISTRAR:', domain['whois_registrar'])) if 'whois_created' in domain: inf.append(kv('CREATED:', domain['whois_created'])) if domain.get('ssdeep', 0) > 0: inf.append(kv('SSDEEP:', '{}%'.format(domain['ssdeep']))) if domain.get('phash', 0) > 0: inf.append(kv('PHASH:', '{}%'.format(domain['phash']))) cli.append('{}{[fuzzer]:<{}}{} {[domain]:<{}} {}'.format( FG_BLU, domain, wfuz, FG_RST, domain, wdom, ' '.join(inf or ['-']))) return '\n'.join(cli)
def TextToIdna(yourfile): lines=[] with open(yourfile, 'r') as f: for line in f: lines.append(line.strip()) #把文本的每一行读取出来保存成一个列表 tmplist=[] for l in lines: str1 = l.encode('raw_unicode_escape').split('.') tmplist.append(str1) #针对大列表里的字段再根据.分割成小列表,这样保证每一行是一个小列表,这两段for循环可以写成一个 for j in tmplist: #循环大列表 jlist=[] for h in j: #循环小列表,相当于处理每一行文本数据 if h.startswith('xn--'): tt = idna.decode(h) jlist.append(tt) else: jlist.append(h) print '.'.join(jlist) #每一行的内容是一个列表,合成一个文本字符串
def run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains): output = BufferedOutput() # The domain is IDNA-encoded, but for display use Unicode. output.add_heading(idna.decode(domain.encode("ascii"))) if domain == env["PRIMARY_HOSTNAME"]: check_primary_hostname_dns(domain, env, output, dns_domains, dns_zonefiles) if domain in dns_domains: check_dns_zone(domain, env, output, dns_zonefiles) if domain in mail_domains: check_mail_domain(domain, env, output) if domain in web_domains: check_web_domain(domain, rounded_time, env, output) if domain in dns_domains: check_dns_zone_suggestions(domain, env, output, dns_zonefiles) return (domain, output)
def TestOneInput(input_bytes): global total_iters global comparison_iters fdp = atheris.FuzzedDataProvider(input_bytes) transitional = fdp.ConsumeBool() std3 = fdp.ConsumeBool() original = "a." + fdp.ConsumeUnicode(253) try: nfc_original = unicodedata.normalize("NFC", original) libidn2_encoded = libidn2.encode(original, uts46=True, transitional=transitional, nfc=True, std3=std3) idna_encoded = idna.encode(original, strict=False, uts46=True, transitional=transitional, std3_rules=std3).lower() except Exception as e: return if idna_encoded != libidn2_encoded: sys.stderr.write("Transitional=%s, std3=%s\n" % (transitional, std3)) sys.stderr.write("Input codepoints: %s\n" % [hex(ord(x))[2:] for x in original]) raise RuntimeError( "IDNA encoding disagrees with libidn2 encoding.\nInput: %s\nIDNA encoding: %s\nlibidn2 encoding: %s\n" % (original, idna_encoded, libidn2_encoded)) idna_decoded = idna.decode(idna_encoded, uts46=True, std3_rules=std3) libidn2_decoded = libidn2.decode(idna_encoded, uts46=True, std3=std3) if idna_decoded != libidn2_decoded: raise RuntimeError( "IDNA decoding disagrees with libidn2 decoding.\nInput: %s\nEncoding: %s\nIDNA decoding: %s\nlibidn2 decoding: %s" % (original, idna_encoded, idna_decoded, libidn2_decoded))
def _load_cached_tlds(self): """ Loads TLDs from cached file to set. :return: Set of current TLDs :rtype: set """ list_of_tlds = set() with open(self.tld_list_path, 'r') as f: for line in f: tld = line.strip().lower() # skip empty lines if len(tld) <= 0: continue # skip comments if tld[0] == '#': continue list_of_tlds.add("." + tld) list_of_tlds.add("." + idna.decode(tld)) return list_of_tlds
def _load_cached_tlds(self): """ Loads TLDs from cached file to set. :return: Set of current TLDs :rtype: set """ set_of_tlds = set() with open(self._tld_list_path, 'r') as f_cache_tld: for line in f_cache_tld: tld = line.strip().lower() # skip empty lines if not tld: continue # skip comments if tld[0] == '#': continue set_of_tlds.add("." + tld) set_of_tlds.add("." + idna.decode(tld)) return set_of_tlds
def idna_to_domain(passed_domain): """ Change idna domain to unicode. Should only be done right before display! :return: """ # make sure we are str if type(passed_domain) is not bytes: # already a unicode domain. Just return. return passed_domain else: str_domain = passed_domain # try to decode idna2008 try: returnval = idna.decode(str_domain) except: # on fail, fall back to older (non compatible) IDNA 2003 try: returnval = str_domain.decode('idna') except: # could not decode, return string as is. returnval = str_domain return returnval
def _decode_general_name(backend, gn): if gn.type == backend._lib.GEN_DNS: data = backend._ffi.buffer(gn.d.dNSName.data, gn.d.dNSName.length)[:] if data.startswith(b"*."): # This is a wildcard name. We need to remove the leading wildcard, # IDNA decode, then re-add the wildcard. Wildcard characters should # always be left-most (RFC 2595 section 2.4). data = u"*." + idna.decode(data[2:]) else: # Not a wildcard, decode away. If the string has a * in it anywhere # invalid this will raise an InvalidCodePoint data = idna.decode(data) return x509.DNSName(data) elif gn.type == backend._lib.GEN_URI: data = backend._ffi.buffer( gn.d.uniformResourceIdentifier.data, gn.d.uniformResourceIdentifier.length )[:].decode("ascii") parsed = urllib_parse.urlparse(data) hostname = idna.decode(parsed.hostname) if parsed.port: netloc = hostname + u":" + six.text_type(parsed.port) else: netloc = hostname # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. uri = urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment )) return x509.UniformResourceIdentifier(uri) elif gn.type == backend._lib.GEN_RID: oid = _obj2txt(backend, gn.d.registeredID) return x509.RegisteredID(x509.ObjectIdentifier(oid)) elif gn.type == backend._lib.GEN_IPADD: return x509.IPAddress( ipaddress.ip_address( backend._ffi.buffer( gn.d.iPAddress.data, gn.d.iPAddress.length )[:] ) ) elif gn.type == backend._lib.GEN_DIRNAME: return x509.DirectoryName( _decode_x509_name(backend, gn.d.directoryName) ) elif gn.type == backend._lib.GEN_EMAIL: data = backend._ffi.buffer( gn.d.rfc822Name.data, gn.d.rfc822Name.length )[:].decode("ascii") name, address = parseaddr(data) parts = address.split(u"@") if name or len(parts) > 2 or not address: # parseaddr has found a name (e.g. Name <email>) or the split # has found more than 2 parts (which means more than one @ sign) # or the entire value is an empty string. raise ValueError("Invalid rfc822name value") elif len(parts) == 1: # Single label email name. This is valid for local delivery. No # IDNA decoding can be done since there is no domain component. return x509.RFC822Name(address) else: # A normal email of the form [email protected]. Let's attempt to # decode the domain component and return the entire address. return x509.RFC822Name( parts[0] + u"@" + idna.decode(parts[1]) ) else: # otherName, x400Address or ediPartyName raise x509.UnsupportedGeneralNameType( "{0} is not a supported type".format( x509._GENERAL_NAMES.get(gn.type, gn.type) ), gn.type )
def _decode_general_name(backend, gn): if gn.type == backend._lib.GEN_DNS: data = backend._asn1_string_to_bytes(gn.d.dNSName) if not data: decoded = u"" elif data.startswith(b"*."): # This is a wildcard name. We need to remove the leading wildcard, # IDNA decode, then re-add the wildcard. Wildcard characters should # always be left-most (RFC 2595 section 2.4). decoded = u"*." + idna.decode(data[2:]) else: # Not a wildcard, decode away. If the string has a * in it anywhere # invalid this will raise an InvalidCodePoint decoded = idna.decode(data) if data.startswith(b"."): # idna strips leading periods. Name constraints can have that # so we need to re-add it. Sigh. decoded = u"." + decoded return x509.DNSName(decoded) elif gn.type == backend._lib.GEN_URI: data = backend._asn1_string_to_ascii(gn.d.uniformResourceIdentifier) parsed = urllib_parse.urlparse(data) if parsed.hostname: hostname = idna.decode(parsed.hostname) else: hostname = "" if parsed.port: netloc = hostname + u":" + six.text_type(parsed.port) else: netloc = hostname # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. uri = urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment )) return x509.UniformResourceIdentifier(uri) elif gn.type == backend._lib.GEN_RID: oid = _obj2txt(backend, gn.d.registeredID) return x509.RegisteredID(x509.ObjectIdentifier(oid)) elif gn.type == backend._lib.GEN_IPADD: data = backend._asn1_string_to_bytes(gn.d.iPAddress) data_len = len(data) if data_len == 8 or data_len == 32: # This is an IPv4 or IPv6 Network and not a single IP. This # type of data appears in Name Constraints. Unfortunately, # ipaddress doesn't support packed bytes + netmask. Additionally, # IPv6Network can only handle CIDR rather than the full 16 byte # netmask. To handle this we convert the netmask to integer, then # find the first 0 bit, which will be the prefix. If another 1 # bit is present after that the netmask is invalid. base = ipaddress.ip_address(data[:data_len // 2]) netmask = ipaddress.ip_address(data[data_len // 2:]) bits = bin(int(netmask))[2:] prefix = bits.find('0') # If no 0 bits are found it is a /32 or /128 if prefix == -1: prefix = len(bits) if "1" in bits[prefix:]: raise ValueError("Invalid netmask") ip = ipaddress.ip_network(base.exploded + u"/{0}".format(prefix)) else: ip = ipaddress.ip_address(data) return x509.IPAddress(ip) elif gn.type == backend._lib.GEN_DIRNAME: return x509.DirectoryName( _decode_x509_name(backend, gn.d.directoryName) ) elif gn.type == backend._lib.GEN_EMAIL: data = backend._asn1_string_to_ascii(gn.d.rfc822Name) name, address = parseaddr(data) parts = address.split(u"@") if name or not address: # parseaddr has found a name (e.g. Name <email>) or the entire # value is an empty string. raise ValueError("Invalid rfc822name value") elif len(parts) == 1: # Single label email name. This is valid for local delivery. No # IDNA decoding can be done since there is no domain component. return x509.RFC822Name(address) else: # A normal email of the form [email protected]. Let's attempt to # decode the domain component and return the entire address. return x509.RFC822Name( parts[0] + u"@" + idna.decode(parts[1]) ) elif gn.type == backend._lib.GEN_OTHERNAME: type_id = _obj2txt(backend, gn.d.otherName.type_id) value = backend._asn1_to_der(gn.d.otherName.value) return x509.OtherName(x509.ObjectIdentifier(type_id), value) else: # x400Address or ediPartyName raise x509.UnsupportedGeneralNameType( "{0} is not a supported type".format( x509._GENERAL_NAMES.get(gn.type, gn.type) ), gn.type )
def process_result_value(self, value, dialect): return idna.decode(value)
def process_result_value(self, value, dialect): localpart, domain_name = value.split('@') return "{0}@{1}".format( localpart, idna.decode(domain_name), )