Python parse_tld Examples, tld.parse_tld Python Examples

Example #1

0

Show file

File: UrlHelper.py Project: billsteve/XX

 def get_url_level(url):
     url = UrlHelper.add_schema(url)
     if parse_tld(url)[2] == "" or parse_tld(url)[2] == "www":
         return 1
     elif parse_tld(url)[2]:
         return 2
     else:
         return 0

Example #2

0

Show file

def benchmark_tld(domains, ):
    start = time.perf_counter()

    for domain in domains:
        tld.parse_tld(domain)

    end = time.perf_counter()

    print(f'tld: {end - start}s')

Example #3

0

Show file

File: url_benchmark.py Project: nitzspiz/PyDomainExtractor

def benchmark_tld(urls, ):
    start = time.perf_counter()

    for url in urls:
        tld.parse_tld(url)

    end = time.perf_counter()

    print(f'tld: {end - start}s')

Example #4

0

Show file

File: domain.py Project: zodiacyann/ARL

def check_domain_black(domain):
    global blackdomain_list
    global blackhexie_list
    if blackdomain_list is None:
        with open(Config.black_domain_path) as f:
            blackdomain_list = f.readlines()

    for item in blackdomain_list:
        item = item.strip()
        if item and domain.endswith(item):
            return True

    if blackhexie_list is None:
        with open(Config.black_heixie_path) as f:
            blackhexie_list = f.readlines()

    for item in blackhexie_list:
        item = item.strip()
        _, _, subdomain = tld.parse_tld(domain,
                                        fix_protocol=True,
                                        fail_silently=True)
        if subdomain and item and item.strip() in subdomain:
            return True

    return False

Example #5

0

Show file

File: backup_domain.py Project: s4copsd4v/w13scan

    def audit(self):
        method = self.requests.command  # 请求方式 GET or POST
        headers = self.requests.get_headers()  # 请求头 dict类型
        url = self.build_url()  # 请求完整URL

        resp_data = self.response.get_body_data()  # 返回数据 byte类型
        resp_str = self.response.get_body_str()  # 返回数据 str类型 自动解码
        resp_headers = self.response.get_headers()  # 返回头 dict类型

        p = self.requests.urlparse
        params = self.requests.params
        netloc = self.requests.netloc

        domain = "{}://{}/".format(p.scheme, p.netloc)

        payloads = parse_tld(domain, fix_protocol=True, fail_silently=True)
        if not payloads:
            return

        for payload in payloads:

            for i in ['.rar', '.zip']:
                test_url = domain + payload + i
                r = requests.get(test_url, headers=headers, allow_redirects=False, stream=True)
                content = r.raw2.read(10)
                if r.status_code == 200 and self._check(content):
                    rarsize = int(r.headers.get('Content-Length')) // 1024 // 1024
                    out.success(test_url, self.name, size="{}M".format(rarsize))

Example #6

0

Show file

File: main.py Project: tomtalp/FakeDomainFinder

    def get_fake_domain(self, seed):
        """
		Given a real domain, generate a fake version of it by doing some weird permutations
		"""
        seed = seed.lower()
        seed_with_http = "http://" + seed
        seed_domain_after_tld_parsing = tld_parser.parse_tld(seed_with_http)
        tld, domain, _ = seed_domain_after_tld_parsing  # ignoring subdomain for now

        permutable_chars_in_word = [
            c for c in domain if c in self.permutations_dictionay
        ]
        if len(permutable_chars_in_word) == 1:
            n_of_obfuscations = 1
        else:
            n_of_obfuscations = np.random.randint(
                1, len(permutable_chars_in_word))

        chars_to_obfuscate = np.random.choice(permutable_chars_in_word,
                                              n_of_obfuscations,
                                              replace=False)
        # print("chars_to_obfuscate = ", chars_to_obfuscate)

        new_domain = ""
        for c in domain:
            if c in chars_to_obfuscate:
                new_char = np.random.choice(self.permutations_dictionay[c],
                                            1)[0]
                new_domain += new_char
            else:
                new_domain += c

        return "{new_domain}.{tld}".format(new_domain=new_domain, tld=tld)

Example #7

0

Show file

    def audit(self):
        headers = self.requests.headers
        url = self.requests.url
        p = urlparse(url)
        domain = "{}://{}/".format(p.scheme, p.netloc)

        try:
            payloads = parse_tld(domain, fix_protocol=True, fail_silently=True)
        except AttributeError:
            payloads = None
        if not payloads:
            return

        for payload in payloads:

            for i in ['.rar', '.zip']:
                test_url = domain + payload + i
                r = requests.get(test_url, headers=headers, allow_redirects=False, stream=True)
                try:
                    content = r.raw.read(10)
                except:
                    continue
                if r.status_code == 200 and self._check(content):
                    rarsize = int(r.headers.get('Content-Length')) // 1024 // 1024
                    result = self.new_result()
                    result.init_info(self.requests.url, "备份文件下载", VulType.BRUTE_FORCE)
                    result.add_detail("payload请求", r.reqinfo, content.decode(errors='ignore'),
                                      "备份文件大小:{}M".format(rarsize), "", "", PLACE.GET)
                    self.success(result)

Example #8

0

Show file

File: dnstwist.py Project: bjb28/dnstwist

 def __domain_tld(self, domain):
     try:
         from tld import parse_tld
     except ImportError:
         ctld = [
             "org",
             "com",
             "net",
             "gov",
             "edu",
             "co",
             "mil",
             "nom",
             "ac",
             "info",
             "biz",
         ]
         d = domain.rsplit(".", 3)
         if len(d) == 2:
             return "", d[0], d[1]
         if len(d) > 2:
             if d[-2] in ctld:
                 return ".".join(d[:-3]), d[-3], ".".join(d[-2:])
             else:
                 return ".".join(d[:-2]), d[-2], d[-1]
     else:
         d = parse_tld(domain, fix_protocol=True)[::-1]
         if d[1:] == d[:-1] and None in d:
             d = tuple(domain.rsplit(".", 2))
             d = ("", ) * (3 - len(d)) + d
         return d

Example #9

0

Show file

File: UrlHelper.py Project: billsteve/XX

 def get_main_url(url):
     url = UrlHelper.get_absolute_url(url)
     if url:
         lurl = parse_tld(url)
         if lurl[2] and len(lurl[2]):
             if lurl[2] == "www":
                 return lurl[1] + "." + lurl[0]
             else:
                 return lurl[2] + "." + lurl[1] + "." + lurl[0]
         elif lurl[1] and len(lurl[1]):
             return lurl[1] + "." + lurl[0]
         else:
             return lurl[0]

Example #10

0

Show file

def get_domain_root(url):
    from tld import parse_tld
    domain_root = ""
    try:

        ## 若不是 http或https开头，则补上方便正则匹配规则
        if len(url.split(
                "://")) <= 1 and url[0:4] != "http" and url[0:5] != "https":
            url = "http://" + url

        domain_root = '.'.join(list(parse_tld(url))[::-1])
    except Exception as ex:
        domain_root = "-"
    return domain_root

Example #11

0

Show file

	def __domain_tld(self, domain):
		try:
			from tld import parse_tld
		except ImportError:
			ctld = ['org', 'com', 'net', 'gov', 'edu', 'co', 'mil', 'nom', 'ac', 'info', 'biz']
			d = domain.rsplit('.', 3)
			if len(d) == 2:
				return '', d[0], d[1]
			if len(d) > 2:
				if d[-2] in ctld:
					return '.'.join(d[:-3]), d[-3], '.'.join(d[-2:])
				else:
					return '.'.join(d[:-2]), d[-2], d[-1]
		else:
			d = parse_tld(domain, fix_protocol=True)[::-1]
			if d[1:] == d[:-1] and None in d:
				d = tuple(domain.rsplit('.', 2))
				d = ('',) * (3-len(d)) + d
			return d

Example #12

0

Show file

File: create_user.py Project: frei-0xff/sourcery-test

def get_user_name(site_name):
    site_name = '.'.join(  # Убираем TLD
        parse_tld(site_name, fix_protocol=True)[:-3:-1]
    )
    user_name = site_name.strip(".").replace('.', '_')

    def does_exist(user):
        try:
            pwd.getpwnam(user)
            return True
        except KeyError:  # Пользователь не найден
            return False

    def trunc_user_name(user_name, postfix):
        return f"{user_name[:8-len(str(postfix))]}{postfix}"

    postfix = ""
    while(does_exist(trunc_user_name(user_name, postfix))):
        postfix = (postfix or 0) + 1
    user_name = trunc_user_name(user_name, postfix)
    user_name = re.sub(r"[^a-z0-9_-]+", "", user_name)
    return user_name.rstrip('-')

Example #13

0

Show file

File: UrlHelper.py Project: billsteve/XX

 def get_top_level_url(url):
     lurl = parse_tld(UrlHelper.add_schema(url))
     return lurl[1] + "." + lurl[0]

Example #14

0

Show file

File: ihoneyBakFileScan.py Project: nr4v3n/ihoneyBakFileScan

def dispatcher(url_file=None, url=None, max_thread=1, dic=None):
    urllist = []
    if url_file is not None and url is None:
        with open(str(url_file)) as f:
            while True:
                line = str(f.readline()).strip()
                if line:
                    urllist.append(line)
                else:
                    break
    elif url is not None and url_file is None:
        urllist.append(url)
    else:
        pass

    with open('success.txt', 'w'):
        pass

    q = Queue()

    for u in urllist:
        # 假设url为www.test.gov.cn，自动根据域名生成一些备份文件名，如下：
        # www.test.gov.cn.rar www.test.gov.cn.zip
        # wwwtestgovcn.rar wwwtestgovcn.zip
        # testgovcn.rar testgovcn.zip
        # test.gov.cn.rar test.gov.cn.zip
        # www.rar www.zip
        if not "http" in u:
            u = 'http://' + u
        u1 = u

        u = u.split(':')[0] + ":" + u.split(':')[1]

        www1 = u.split('.')
        wwwlen = len(www1)
        wwwhost = ''
        for i in range(1, wwwlen):
            wwwhost += www1[i]

        current_info_dic = deepcopy(dic)  # 深拷贝
        current_info_dic.extend([
            u.replace('http://', '').replace('https://', '').replace(
                '.', '_') + '.rar',
            u.replace('http://', '').replace('https://', '').replace(
                '.', '_') + '.zip',
            u.replace('http://', '').replace('https://', '').replace('.', '_')
            + '.tar.gz'
        ])
        current_info_dic.extend([
            u.replace('http://', '').replace('https://', '') + '.rar',
            u.replace('http://', '').replace('https://', '') + '.zip',
            u.replace('http://', '').replace('https://', '') + '.tar.gz'
        ])

        if tld.parse_tld(u)[1]:
            current_info_dic.extend([
                tld.parse_tld(u)[1] + ".rar",
                tld.parse_tld(u)[1] + ".zip",
                tld.parse_tld(u)[1] + ".tar.gz"
            ])
            current_info_dic.extend([
                u.replace('http://', '').replace('https://', '').replace(
                    '.', '') + '.rar',
                u.replace('http://', '').replace('https://', '').replace(
                    '.', '') + '.zip',
                u.replace('http://', '').replace('https://', '').replace(
                    '.', '') + '.tar.gz'
            ])
            current_info_dic.extend([
                u.split('.', 1)[-1] + '.rar',
                u.split('.', 1)[-1] + '.zip',
                u.split('.', 1)[-1] + '.tar.gz'
            ])
            current_info_dic.extend(
                [wwwhost + '.rar', wwwhost + '.zip', wwwhost + '.tar.gz'])
        # print(current_info_dic)
        """ 最终每个url对应可以扫描的字典部分如下
        ['web.rar', 'web.zip', 'backup.rar', 'www.rar', 'bak.rar', 'wwwroot.zip', 'bak.zip', 'www.zip', 'wwwroot.rar', 'backup.zip', 'www.test.gov.cn.rar', 'www.test.gov.cn.zip', 'wwwtestgovcn.rar', 'wwwtestgovcn.zip', 'testgovcn.rar', 'testgovcn.zip', 'test.gov.cn.rar', 'test.gov.cn.zip']
        ['web.rar', 'web.zip', 'backup.rar', 'www.rar', 'bak.rar', 'wwwroot.zip', 'bak.zip', 'www.zip', 'wwwroot.rar', 'backup.zip', 'www.baidu.com.rar', 'www.baidu.com.zip', 'wwwbaiducom.rar', 'wwwbaiducom.zip', 'baiducom.rar', 'baiducom.zip', 'baidu.com.rar', 'baidu.com.zip']
        """

        for info in current_info_dic:

            url = str(u1) + '/' + str(info)

            q.put(url)

    # print(q.__dict__['queue'])  # 注释掉或删掉这行，如果url批量太多，会满屏幕打印队列中待扫描的地址
    print('队列大小：' + str(q.qsize()))
    threadl = [BakScan(q) for _ in range(max_thread)]
    for t in threadl:
        t.start()

    for t in threadl:
        t.join()

Example #15

0

Show file

File: result.py Project: xyzrlee/configuration

def key_domain(dom: str) -> Tuple[str, str, str]:
    t = tld.parse_tld(dom, fail_silently=True, fix_protocol=True)
    return (t[1], t[0], t[2]) if t else (dom, "", "")