Example #1
0
 def process(self):
     list_domains=self.db['new_domaines'].distinct('domaine')
     fex=Faup()
     for domain in list_domains:
         url='http://'+str(domain)
         fex.decode(url, False)
         print (fex.get_tld()+','+fex.get_domain()+','+','.join(fex.get_subdomain().split('.')[::-1]).replace('www','')).replace(',,',',')
Example #2
0
 def __post_init__(self):
     if self.domain is None:
         f = Faup(
         )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
         f.decode(self.address.split("@")[-1])
         self.top_level_domain = f.get_tld()
         self.domain = f.get_domain()
         self.subdomain = f.get_subdomain()
Example #3
0
 def process(self):
     list_domains = self.db['new_domaines'].distinct('domaine')
     fex = Faup()
     for domain in list_domains:
         url = 'http://' + str(domain)
         fex.decode(url, False)
         print(fex.get_tld() + ',' + fex.get_domain() + ',' +
               ','.join(fex.get_subdomain().split('.')[::-1]).replace(
                   'www', '')).replace(',,', ',')
Example #4
0
    def __post_init__(self):
        f = Faup(
        )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
        f.decode(self.url)

        self.scheme = f.get_scheme()
        self.top_level_domain = f.get_tld()
        self.domain = f.get_domain()
        self.subdomain = f.get_subdomain()
        self.path = f.get_resource_path()
Example #5
0
#!/usr/bin/python

from pyfaup.faup import Faup
import sys
import codecs
import binascii

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()
for url in urls:
    url=url.replace('\n','')
    #print("We decode the url: %s" % (url))
    #if sys.version.split('.')[0].split('.')[0]=='3':
    f.decode(bytes(url,'utf-8'), False)
    #if sys.version.split('.')[0].split('.')[0]=='2':
    #        f.decode(bytes(url),False)
    #data = f.get()
    f.get_tld()
    #f.get_domain()
    #f.get_subdomain()
    #print(f.get_tld())
    #print(f.get_domain())
    #print("URL TLD: %s" % (f.get_tld()))
Example #6
0
File: test.py Project: aguinet/faup
#!/usr/bin/python

from pyfaup.faup import Faup
import sys
import codecs
import binascii

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()

for url in urls:
    url=url.replace('\n','')
    print("URL:[%s]" % (url))
    f.decode(url)
    print("-----> Extracted TLD:%s" % f.get_tld())