def process(self): list_domains=self.db['new_domaines'].distinct('domaine') fex=Faup() for domain in list_domains: url='http://'+str(domain) fex.decode(url, False) print (fex.get_tld()+','+fex.get_domain()+','+','.join(fex.get_subdomain().split('.')[::-1]).replace('www','')).replace(',,',',')
def __post_init__(self): if self.domain is None: f = Faup( ) # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/ f.decode(self.address.split("@")[-1]) self.top_level_domain = f.get_tld() self.domain = f.get_domain() self.subdomain = f.get_subdomain()
def process(self): list_domains = self.db['new_domaines'].distinct('domaine') fex = Faup() for domain in list_domains: url = 'http://' + str(domain) fex.decode(url, False) print(fex.get_tld() + ',' + fex.get_domain() + ',' + ','.join(fex.get_subdomain().split('.')[::-1]).replace( 'www', '')).replace(',,', ',')
def __post_init__(self): f = Faup( ) # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/ f.decode(self.url) self.scheme = f.get_scheme() self.top_level_domain = f.get_tld() self.domain = f.get_domain() self.subdomain = f.get_subdomain() self.path = f.get_resource_path()
#!/usr/bin/python from pyfaup.faup import Faup import sys import codecs import binascii f = Faup() file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore') urls=file_urls.readlines() for url in urls: url=url.replace('\n','') #print("We decode the url: %s" % (url)) #if sys.version.split('.')[0].split('.')[0]=='3': f.decode(bytes(url,'utf-8'), False) #if sys.version.split('.')[0].split('.')[0]=='2': # f.decode(bytes(url),False) #data = f.get() f.get_tld() #f.get_domain() #f.get_subdomain() #print(f.get_tld()) #print(f.get_domain()) #print("URL TLD: %s" % (f.get_tld()))
#!/usr/bin/python from pyfaup.faup import Faup import sys import codecs import binascii f = Faup() file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore') urls=file_urls.readlines() for url in urls: url=url.replace('\n','') print("URL:[%s]" % (url)) f.decode(url) print("-----> Extracted TLD:%s" % f.get_tld())