def test_get_sld_from_builtin_full_publicsuffix_org_list_with_two_level_rules( self): psl = publicsuffix.PublicSuffixList(None) assert 'com' == psl.get_sld('com') assert 'example.com' == psl.get_sld('example.com') assert 'example.com' == psl.get_sld('b.example.com') assert 'example.com' == psl.get_sld('a.b.example.com')
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_one_rule( self): psl = publicsuffix.PublicSuffixList(None) assert 'biz' == psl.get_sld('biz') assert 'domain.biz' == psl.get_sld('domain.biz') assert 'domain.biz' == psl.get_sld('b.domain.biz') assert 'domain.biz' == psl.get_sld('a.b.domain.biz')
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_wildcard_rule( self): psl = publicsuffix.PublicSuffixList(None) assert 'er' == psl.get_sld('er') assert 'c.er' == psl.get_sld('c.er') assert 'b.c.er' == psl.get_sld('b.c.er') assert 'b.c.er' == psl.get_sld('a.b.c.er')
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_listed_ut_non_internet_tld( self): psl = publicsuffix.PublicSuffixList(None) assert 'local' == psl.get_sld('local') assert 'local' == psl.get_sld('example.local') assert 'local' == psl.get_sld('b.example.local') assert 'local' == psl.get_sld('a.b.example.local')
def test_utf8_encoded(self): psl = publicsuffix.PublicSuffixList( idna=False) # uses the list provided utf-8 defaults assert u'食狮.com.cn' == psl.get_public_suffix(u'食狮.com.cn') assert u'食狮.公司.cn' == psl.get_public_suffix(u'食狮.公司.cn') assert u'食狮.公司.cn' == psl.get_public_suffix(u'www.食狮.公司.cn') assert u'shishi.公司.cn' == psl.get_public_suffix(u'shishi.公司.cn')
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_leading_dot( self): psl = publicsuffix.PublicSuffixList(None) assert 'com' == psl.get_sld('.com') assert 'example' == psl.get_sld('.example') assert 'example.com' == psl.get_sld('.example.com') assert 'example' == psl.get_sld('.example.example')
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_unlisted_tld( self): psl = publicsuffix.PublicSuffixList(None) assert 'example' == psl.get_sld('example') assert 'example' == psl.get_sld('example.example') assert 'example' == psl.get_sld('b.example.example') assert 'example' == psl.get_sld('a.b.example.example')
def test_convenience_functions(self): psl = publicsuffix.PublicSuffixList() # these functions should be identical assert psl.get_sld('www.google.com') == psl.get_public_suffix( 'www.google.com') assert psl.get_sld('www.test.ak.us') == psl.get_public_suffix( 'www.test.ak.us')
def test_get_public_suffix_from_empty_list(self): psl = publicsuffix.PublicSuffixList([]) assert 'com' == psl.get_public_suffix('com') assert 'com' == psl.get_public_suffix('COM') assert 'com' == psl.get_public_suffix('.com') assert 'com' == psl.get_public_suffix('a.example.com')
def test_get_sld_from_list(self): psl = publicsuffix.PublicSuffixList(['com']) assert 'example.com' == psl.get_sld('a.example.com') assert 'example.com' == psl.get_sld('a.a.example.com') assert 'example.com' == psl.get_sld('a.a.a.example.com') assert 'example.com' == psl.get_sld('A.example.com') assert 'example.com' == psl.get_sld('.a.a.example.com')
def test_get_sld_backward_compatibility(self): psl = publicsuffix.PublicSuffixList() assert 'com' == psl.get_sld('com') assert 'foo.com' == psl.get_sld('foo.com') assert 'foo.co.jp' == psl.get_sld('foo.co.jp') assert 'co.jp' == psl.get_sld('co.jp') assert 'jp' == psl.get_sld('jp')
def test_no_wildcard(self): psl = publicsuffix.PublicSuffixList() # test completion when no wildcards should be processed assert 'com.pg' == psl.get_public_suffix('telinet.com.pg', wildcard=False) assert 'ap-southeast-1.elb.amazonaws.com' == psl.get_public_suffix( 'blah.ap-southeast-1.elb.amazonaws.com', wildcard=False)
def test_tld_function(self): psl = publicsuffix.PublicSuffixList() # checks that the eTLD or TLD is produced assert psl.get_tld('com') == 'com' assert psl.get_tld('telinet.com.pg', wildcard=True) == 'com.pg' assert psl.get_tld('telinet.com.pg', wildcard=False) == 'pg' assert psl.get_tld('telinet.co.uk', wildcard=False) == 'co.uk' assert psl.get_tld('blah.local', strict=True) is None
def test_get_sld_from_list_with_unicode(self): psl = publicsuffix.PublicSuffixList([u'\u0440\u0444'], idna=False) assert u'\u0440\u0444' == psl.get_sld(u'\u0440\u0444') assert u'example.\u0440\u0444' == psl.get_sld(u'example.\u0440\u0444') assert u'example.\u0440\u0444' == psl.get_sld( u'a.example.\u0440\u0444') assert u'example.\u0440\u0444' == psl.get_sld( u'a.a.example.\u0440\u0444')
def test_get_sld_backward_compatibility_strict_and_wildcard_flags(self): psl = publicsuffix.PublicSuffixList() assert 'local' == psl.get_sld('local') assert 'local' == psl.get_sld('foo.local') assert None == psl.get_sld('local', strict=True) assert None == psl.get_sld('foo.local', strict=True) assert 'local' == psl.get_sld('local', wildcard=False) assert 'local' == psl.get_sld('foo.local', strict=False)
def test_get_sld_top_convenience_function_is_the_same_as_PublicSuffixList_method( self): psl = publicsuffix.PublicSuffixList() # these functions should be identical assert psl.get_sld('www.google.com') == publicsuffix.get_sld( 'www.google.com') assert psl.get_sld('www.test.ak.us') == publicsuffix.get_sld( 'www.test.ak.us')
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_us_k12(self): psl = publicsuffix.PublicSuffixList(None) assert 'ak.us' == psl.get_sld('ak.us') assert 'test.ak.us' == psl.get_sld('test.ak.us') assert 'test.ak.us' == psl.get_sld('www.test.ak.us') assert 'k12.ak.us' == psl.get_sld('k12.ak.us') assert 'test.k12.ak.us' == psl.get_sld('test.k12.ak.us') assert 'test.k12.ak.us' == psl.get_sld('www.test.k12.ak.us')
def test_get_sld_idna_encoded(self): # actually the default psl = publicsuffix.PublicSuffixList(idna=True) assert 'xn--85x722f.com.cn' == psl.get_sld('xn--85x722f.com.cn') assert 'xn--85x722f.xn--55qx5d.cn' == psl.get_sld( 'xn--85x722f.xn--55qx5d.cn') assert 'xn--85x722f.xn--55qx5d.cn' == psl.get_sld( 'www.xn--85x722f.xn--55qx5d.cn') assert 'shishi.xn--55qx5d.cn' == psl.get_sld('shishi.xn--55qx5d.cn')
def test_get_public_suffix_from_list_with_unicode(self): psl = publicsuffix.PublicSuffixList([u('\u0440\u0444')], idna=False) assert u('\u0440\u0444') == psl.get_public_suffix(u('\u0440\u0444')) assert u('example.\u0440\u0444') == psl.get_public_suffix( u('example.\u0440\u0444')) assert u('example.\u0440\u0444') == psl.get_public_suffix( u('a.example.\u0440\u0444')) assert u('example.\u0440\u0444') == psl.get_public_suffix( u('a.a.example.\u0440\u0444'))
def _validate(self) -> dict: result = dict() result['errors'] = [] result['records'] = [] if not self.mx_records or len(self.mx_records) == 0: result['errors'].append(MXErrors.NO_MX_RECORDS) self.errors = result['errors'] return result for record in self.mx_records: if not record: result['errors'].append(MXErrors.BLANK_MX_RECORD) continue parts = record.split(' ') if len(parts) > 2: result['errors'].append(MXErrors.TOO_MANY_PARTS) continue if len(parts) < 2: result['errors'].append(MXErrors.TOO_FEW_PARTS) continue preference = parts[0] exchange = parts[1] # Check preference to be an unsigned 16 bit int, RFC 974 (Page 2) preference, preference_errors = validate_numbers(preference, MXErrors.INVALID_PREFERENCE, MXErrors.PREFERENCE_OUT_OF_RANGE, MaxValue.USIXTEEN) result['errors'].extend(preference_errors) if preference_errors: continue if is_an_ip(exchange): result['errors'].append(MXErrors.EXCHANGE_IS_AN_IP) continue if not validate_domain(exchange): result['errors'].append(MXErrors.INVALID_EXCHANGE) continue if not publicsuffix2.get_tld(exchange) in publicsuffix2.PublicSuffixList().tlds: result['errors'].append(MXErrors.NOT_PUBLIC_DOMAIN) continue if not exchange.endswith('.') and exchange.endswith(publicsuffix2.get_tld(exchange)): result['errors'].append(MXErrors.POSSIBLE_BAD_EXCHANGE) result['records'].append(MxRecord(preference, exchange)) self.errors = result['errors'] self.valid_mx_records = result['records'] return result
def test_get_sld_from_list_with_exception_rule(self): psl = publicsuffix.PublicSuffixList( ['*.example.com', '!b.example.com']) assert 'a.example.com' == psl.get_sld('a.example.com') assert 'a.a.example.com' == psl.get_sld('a.a.example.com') assert 'a.a.example.com' == psl.get_sld('a.a.a.example.com') assert 'a.a.example.com' == psl.get_sld('a.a.a.a.example.com') assert 'b.example.com' == psl.get_sld('b.example.com') assert 'b.example.com' == psl.get_sld('b.b.example.com') assert 'b.example.com' == psl.get_sld('b.b.b.example.com') assert 'b.example.com' == psl.get_sld('b.b.b.b.example.com')
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_japanese_domain( self): psl = publicsuffix.PublicSuffixList(None) assert 'jp' == psl.get_sld('jp') assert 'test.jp' == psl.get_sld('test.jp') assert 'test.jp' == psl.get_sld('www.test.jp') assert 'ac.jp' == psl.get_sld('ac.jp') assert 'test.ac.jp' == psl.get_sld('test.ac.jp') assert 'test.ac.jp' == psl.get_sld('www.test.ac.jp') assert 'kobe.jp' == psl.get_sld('kobe.jp') assert 'c.kobe.jp' == psl.get_sld('c.kobe.jp') assert 'b.c.kobe.jp' == psl.get_sld('b.c.kobe.jp') assert 'b.c.kobe.jp' == psl.get_sld('a.b.c.kobe.jp')
def test_get_tld_returns_correct_tld_or_etld(self): psl = publicsuffix.PublicSuffixList() assert 'com' == psl.get_tld('com') assert 'kobe.jp' == psl.get_tld('city.kobe.jp') assert 'kobe.jp' == psl.get_tld('kobe.jp') assert 'amazonaws.com' == psl.get_tld('amazonaws.com') assert 'com.pg' == psl.get_tld('telinet.com.pg', wildcard=True) assert 'pg' == psl.get_tld('telinet.com.pg', wildcard=False) assert 'com.pg' == psl.get_tld('com.pg', wildcard=True) assert 'pg' == psl.get_tld('com.pg', wildcard=False) assert 'co.uk' == psl.get_tld('telinet.co.uk', wildcard=False) assert 'co.uk' == psl.get_tld('co.uk', wildcard=True) assert 'co.uk' == psl.get_tld('co.uk', wildcard=False) assert None == psl.get_tld('blah.local', strict=True) assert None == psl.get_tld('blah.local', wildcard=False) assert 'local' == psl.get_tld('blah.local')
def get_base_domain(domain, use_fresh_psl=False): """ Gets the base domain name for the given domain .. note:: Results are based on a list of public domain suffixes at https://publicsuffix.org/list/public_suffix_list.dat. Args: domain (str): A domain or subdomain use_fresh_psl (bool): Download a fresh Public Suffix List Returns: str: The base domain of the given domain """ psl_path = os.path.join(tempdir, "public_suffix_list.dat") def download_psl(): url = "https://publicsuffix.org/list/public_suffix_list.dat" # Use a browser-like user agent string to bypass some proxy blocks headers = {"User-Agent": USER_AGENT} try: fresh_psl = requests.get(url, headers=headers).text with open(psl_path, "w", encoding="utf-8") as fresh_psl_file: fresh_psl_file.write(fresh_psl) except Exception as error: raise DownloadError( "Failed to download an updated PSL {0}".format(error)) if use_fresh_psl: if not os.path.exists(psl_path): download_psl() else: psl_age = datetime.now() - datetime.fromtimestamp( os.stat(psl_path).st_mtime) if psl_age > timedelta(hours=24): download_psl() with open(psl_path, encoding="utf-8") as psl_file: psl = publicsuffix2.PublicSuffixList(psl_file) return psl.get_public_suffix(domain) else: return publicsuffix2.get_sld(domain)
def is_external_domain(domain): """ Test if the domain is an "external" domain. An external domain is classified as any child of a public suffix Examples: >>> is_external_domain('google.com') True >>> is_external_domain('tehunoth.com') True >>> is_external_domain('localhost') False >>> is_external_domain('tneohu') False >>> is_external_domain('test.cluster.local') False Controvertial, but thanks to Google this is a thing >>> is_external_domain('web.dev') True """ parts = domain.strip('.').rsplit('.', 1) try: (_, suffix) = parts except ValueError: (suffix,) = parts global _PSL _PSL = _PSL or publicsuffix2.PublicSuffixList() try: _PSL.root[1][suffix] except KeyError: return False else: return True
def test_get_public_suffix_from_list_with_fqdn(self): psl = publicsuffix.PublicSuffixList(['com']) assert 'example.com' == psl.get_public_suffix('example.com.')
def test_get_sld_from_empty_list_in_strict_mode(self): psl = publicsuffix.PublicSuffixList([]) assert None == psl.get_sld('com', strict=True)
def test_get_sld_from_builtin_full_publicsuffix_org_list_with_mixed_case( self): psl = publicsuffix.PublicSuffixList(None) assert 'com' == psl.get_sld('COM') assert 'example.com' == psl.get_sld('example.COM') assert 'example.com' == psl.get_sld('WwW.example.COM')
def test_get_public_suffix_from_builtin_full_publicsuffix_org(self): psl = publicsuffix.PublicSuffixList(None) # Mixed case. assert 'com' == psl.get_public_suffix('COM') assert 'example.com' == psl.get_public_suffix('example.COM') assert 'example.com' == psl.get_public_suffix('WwW.example.COM') # Leading dot. assert 'com' == psl.get_public_suffix('.com') assert 'example' == psl.get_public_suffix('.example') assert 'example.com' == psl.get_public_suffix('.example.com') assert 'example' == psl.get_public_suffix('.example.example') # Unlisted TLD. assert 'example' == psl.get_public_suffix('example') assert 'example' == psl.get_public_suffix('example.example') assert 'example' == psl.get_public_suffix('b.example.example') assert 'example' == psl.get_public_suffix('a.b.example.example') # Listed, but non-Internet, TLD. assert 'local' == psl.get_public_suffix('local') assert 'local' == psl.get_public_suffix('example.local') assert 'local' == psl.get_public_suffix('b.example.local') assert 'local' == psl.get_public_suffix('a.b.example.local') # TLD with only one rule. assert 'biz' == psl.get_public_suffix('biz') assert 'domain.biz' == psl.get_public_suffix('domain.biz') assert 'domain.biz' == psl.get_public_suffix('b.domain.biz') assert 'domain.biz' == psl.get_public_suffix('a.b.domain.biz') # TLD with some two-level rules. assert 'com' == psl.get_public_suffix('com') assert 'example.com' == psl.get_public_suffix('example.com') assert 'example.com' == psl.get_public_suffix('b.example.com') assert 'example.com' == psl.get_public_suffix('a.b.example.com') assert 'uk.com' == psl.get_public_suffix('uk.com') assert 'example.uk.com' == psl.get_public_suffix('example.uk.com') assert 'example.uk.com' == psl.get_public_suffix('b.example.uk.com') assert 'example.uk.com' == psl.get_public_suffix('a.b.example.uk.com') assert 'test.ac' == psl.get_public_suffix('test.ac') # TLD with only one wildcard rule. assert 'er' == psl.get_public_suffix('er') assert 'c.er' == psl.get_public_suffix('c.er') assert 'b.c.er' == psl.get_public_suffix('b.c.er') assert 'b.c.er' == psl.get_public_suffix('a.b.c.er') # More complex TLD. assert 'jp' == psl.get_public_suffix('jp') assert 'test.jp' == psl.get_public_suffix('test.jp') assert 'test.jp' == psl.get_public_suffix('www.test.jp') assert 'ac.jp' == psl.get_public_suffix('ac.jp') assert 'test.ac.jp' == psl.get_public_suffix('test.ac.jp') assert 'test.ac.jp' == psl.get_public_suffix('www.test.ac.jp') assert 'kobe.jp' == psl.get_public_suffix('kobe.jp') assert 'c.kobe.jp' == psl.get_public_suffix('c.kobe.jp') assert 'b.c.kobe.jp' == psl.get_public_suffix('b.c.kobe.jp') assert 'b.c.kobe.jp' == psl.get_public_suffix('a.b.c.kobe.jp') # Exception rule. assert 'city.kobe.jp' == psl.get_public_suffix('city.kobe.jp') assert 'city.kobe.jp' == psl.get_public_suffix('www.city.kobe.jp') # US K12. assert 'us' == psl.get_public_suffix('us') assert 'test.us' == psl.get_public_suffix('test.us') assert 'test.us' == psl.get_public_suffix('www.test.us') assert 'ak.us' == psl.get_public_suffix('ak.us') assert 'test.ak.us' == psl.get_public_suffix('test.ak.us') assert 'test.ak.us' == psl.get_public_suffix('www.test.ak.us') assert 'k12.ak.us' == psl.get_public_suffix('k12.ak.us') assert 'test.k12.ak.us' == psl.get_public_suffix('test.k12.ak.us') assert 'test.k12.ak.us' == psl.get_public_suffix('www.test.k12.ak.us')
def test_exceptions(self): psl = publicsuffix.PublicSuffixList() assert 'www.ck' == psl.get_public_suffix( 'www.www.ck') # www is the exception assert 'this.that.ck' == psl.get_public_suffix('this.that.ck')