def test_get_domain(): assert urls.get_domain('http://www.bbc.co.uk') == 'bbc.co.uk' assert urls.get_domain('http://www.nhs.uk') == 'www.nhs.uk' # nhs.uk is a public suffix, surprise assert urls.get_domain('http://sub.nhs.uk') == 'sub.nhs.uk' # ditto assert urls.get_domain('http://www.example.com') == 'example.com' assert urls.get_domain('http://sub.example.com') == 'example.com' assert urls.get_domain('http://sub.blogspot.com') == 'sub.blogspot.com' # we want this behavior # if the blogspot test doesn't work, try this from the shell: "tldextract -u -p" # unfortunately, all tldextract users use the same cache assert urls.get_domain('http://www.com') == 'www.com'
def test_get_domain(): assert urls.get_domain('http://www.bbc.co.uk') == 'bbc.co.uk' assert urls.get_domain('http://www.nhs.uk') == 'www.nhs.uk' # nhs.uk is a public suffix, surprise assert urls.get_domain('http://sub.nhs.uk') == 'sub.nhs.uk' # ditto assert urls.get_domain('http://www.example.com') == 'example.com' assert urls.get_domain('http://sub.example.com') == 'example.com' assert urls.get_domain('http://sub.blogspot.com') == 'sub.blogspot.com', "make sure private domains are included" # if the blogspot test doesn't work, try this from the shell: "tldextract -u -p" # unfortunately, all tldextract users use the same cache # https://github.com/john-kurkowski/tldextract/issues/66 assert urls.get_domain('http://www.com') == 'www.com'
def test_get_domain(): assert urls.get_domain('http://www.bbc.co.uk') == 'bbc.co.uk' assert urls.get_domain( 'http://www.nhs.uk' ) == 'www.nhs.uk' # nhs.uk is a public suffix, surprise assert urls.get_domain('http://sub.nhs.uk') == 'sub.nhs.uk' # ditto assert urls.get_domain('http://www.example.com') == 'example.com' assert urls.get_domain('http://sub.example.com') == 'example.com' assert urls.get_domain( 'http://sub.blogspot.com' ) == 'sub.blogspot.com', "make sure private domains are included" # if the blogspot test doesn't work, try this from the shell: "tldextract -u -p" # unfortunately, all tldextract users use the same cache # https://github.com/john-kurkowski/tldextract/issues/66 assert urls.get_domain('http://www.com') == 'www.com'