def get_url_level(url): url = UrlHelper.add_schema(url) if parse_tld(url)[2] == "" or parse_tld(url)[2] == "www": return 1 elif parse_tld(url)[2]: return 2 else: return 0
def benchmark_tld(domains, ): start = time.perf_counter() for domain in domains: tld.parse_tld(domain) end = time.perf_counter() print(f'tld: {end - start}s')
def benchmark_tld(urls, ): start = time.perf_counter() for url in urls: tld.parse_tld(url) end = time.perf_counter() print(f'tld: {end - start}s')
def check_domain_black(domain): global blackdomain_list global blackhexie_list if blackdomain_list is None: with open(Config.black_domain_path) as f: blackdomain_list = f.readlines() for item in blackdomain_list: item = item.strip() if item and domain.endswith(item): return True if blackhexie_list is None: with open(Config.black_heixie_path) as f: blackhexie_list = f.readlines() for item in blackhexie_list: item = item.strip() _, _, subdomain = tld.parse_tld(domain, fix_protocol=True, fail_silently=True) if subdomain and item and item.strip() in subdomain: return True return False
def audit(self): method = self.requests.command # 请求方式 GET or POST headers = self.requests.get_headers() # 请求头 dict类型 url = self.build_url() # 请求完整URL resp_data = self.response.get_body_data() # 返回数据 byte类型 resp_str = self.response.get_body_str() # 返回数据 str类型 自动解码 resp_headers = self.response.get_headers() # 返回头 dict类型 p = self.requests.urlparse params = self.requests.params netloc = self.requests.netloc domain = "{}://{}/".format(p.scheme, p.netloc) payloads = parse_tld(domain, fix_protocol=True, fail_silently=True) if not payloads: return for payload in payloads: for i in ['.rar', '.zip']: test_url = domain + payload + i r = requests.get(test_url, headers=headers, allow_redirects=False, stream=True) content = r.raw2.read(10) if r.status_code == 200 and self._check(content): rarsize = int(r.headers.get('Content-Length')) // 1024 // 1024 out.success(test_url, self.name, size="{}M".format(rarsize))
def get_fake_domain(self, seed): """ Given a real domain, generate a fake version of it by doing some weird permutations """ seed = seed.lower() seed_with_http = "http://" + seed seed_domain_after_tld_parsing = tld_parser.parse_tld(seed_with_http) tld, domain, _ = seed_domain_after_tld_parsing # ignoring subdomain for now permutable_chars_in_word = [ c for c in domain if c in self.permutations_dictionay ] if len(permutable_chars_in_word) == 1: n_of_obfuscations = 1 else: n_of_obfuscations = np.random.randint( 1, len(permutable_chars_in_word)) chars_to_obfuscate = np.random.choice(permutable_chars_in_word, n_of_obfuscations, replace=False) # print("chars_to_obfuscate = ", chars_to_obfuscate) new_domain = "" for c in domain: if c in chars_to_obfuscate: new_char = np.random.choice(self.permutations_dictionay[c], 1)[0] new_domain += new_char else: new_domain += c return "{new_domain}.{tld}".format(new_domain=new_domain, tld=tld)
def audit(self): headers = self.requests.headers url = self.requests.url p = urlparse(url) domain = "{}://{}/".format(p.scheme, p.netloc) try: payloads = parse_tld(domain, fix_protocol=True, fail_silently=True) except AttributeError: payloads = None if not payloads: return for payload in payloads: for i in ['.rar', '.zip']: test_url = domain + payload + i r = requests.get(test_url, headers=headers, allow_redirects=False, stream=True) try: content = r.raw.read(10) except: continue if r.status_code == 200 and self._check(content): rarsize = int(r.headers.get('Content-Length')) // 1024 // 1024 result = self.new_result() result.init_info(self.requests.url, "备份文件下载", VulType.BRUTE_FORCE) result.add_detail("payload请求", r.reqinfo, content.decode(errors='ignore'), "备份文件大小:{}M".format(rarsize), "", "", PLACE.GET) self.success(result)
def __domain_tld(self, domain): try: from tld import parse_tld except ImportError: ctld = [ "org", "com", "net", "gov", "edu", "co", "mil", "nom", "ac", "info", "biz", ] d = domain.rsplit(".", 3) if len(d) == 2: return "", d[0], d[1] if len(d) > 2: if d[-2] in ctld: return ".".join(d[:-3]), d[-3], ".".join(d[-2:]) else: return ".".join(d[:-2]), d[-2], d[-1] else: d = parse_tld(domain, fix_protocol=True)[::-1] if d[1:] == d[:-1] and None in d: d = tuple(domain.rsplit(".", 2)) d = ("", ) * (3 - len(d)) + d return d
def get_main_url(url): url = UrlHelper.get_absolute_url(url) if url: lurl = parse_tld(url) if lurl[2] and len(lurl[2]): if lurl[2] == "www": return lurl[1] + "." + lurl[0] else: return lurl[2] + "." + lurl[1] + "." + lurl[0] elif lurl[1] and len(lurl[1]): return lurl[1] + "." + lurl[0] else: return lurl[0]
def get_domain_root(url): from tld import parse_tld domain_root = "" try: ## 若不是 http或https开头,则补上方便正则匹配规则 if len(url.split( "://")) <= 1 and url[0:4] != "http" and url[0:5] != "https": url = "http://" + url domain_root = '.'.join(list(parse_tld(url))[::-1]) except Exception as ex: domain_root = "-" return domain_root
def __domain_tld(self, domain): try: from tld import parse_tld except ImportError: ctld = ['org', 'com', 'net', 'gov', 'edu', 'co', 'mil', 'nom', 'ac', 'info', 'biz'] d = domain.rsplit('.', 3) if len(d) == 2: return '', d[0], d[1] if len(d) > 2: if d[-2] in ctld: return '.'.join(d[:-3]), d[-3], '.'.join(d[-2:]) else: return '.'.join(d[:-2]), d[-2], d[-1] else: d = parse_tld(domain, fix_protocol=True)[::-1] if d[1:] == d[:-1] and None in d: d = tuple(domain.rsplit('.', 2)) d = ('',) * (3-len(d)) + d return d
def get_user_name(site_name): site_name = '.'.join( # Убираем TLD parse_tld(site_name, fix_protocol=True)[:-3:-1] ) user_name = site_name.strip(".").replace('.', '_') def does_exist(user): try: pwd.getpwnam(user) return True except KeyError: # Пользователь не найден return False def trunc_user_name(user_name, postfix): return f"{user_name[:8-len(str(postfix))]}{postfix}" postfix = "" while(does_exist(trunc_user_name(user_name, postfix))): postfix = (postfix or 0) + 1 user_name = trunc_user_name(user_name, postfix) user_name = re.sub(r"[^a-z0-9_-]+", "", user_name) return user_name.rstrip('-')
def get_top_level_url(url): lurl = parse_tld(UrlHelper.add_schema(url)) return lurl[1] + "." + lurl[0]
def dispatcher(url_file=None, url=None, max_thread=1, dic=None): urllist = [] if url_file is not None and url is None: with open(str(url_file)) as f: while True: line = str(f.readline()).strip() if line: urllist.append(line) else: break elif url is not None and url_file is None: urllist.append(url) else: pass with open('success.txt', 'w'): pass q = Queue() for u in urllist: # 假设url为www.test.gov.cn,自动根据域名生成一些备份文件名,如下: # www.test.gov.cn.rar www.test.gov.cn.zip # wwwtestgovcn.rar wwwtestgovcn.zip # testgovcn.rar testgovcn.zip # test.gov.cn.rar test.gov.cn.zip # www.rar www.zip if not "http" in u: u = 'http://' + u u1 = u u = u.split(':')[0] + ":" + u.split(':')[1] www1 = u.split('.') wwwlen = len(www1) wwwhost = '' for i in range(1, wwwlen): wwwhost += www1[i] current_info_dic = deepcopy(dic) # 深拷贝 current_info_dic.extend([ u.replace('http://', '').replace('https://', '').replace( '.', '_') + '.rar', u.replace('http://', '').replace('https://', '').replace( '.', '_') + '.zip', u.replace('http://', '').replace('https://', '').replace('.', '_') + '.tar.gz' ]) current_info_dic.extend([ u.replace('http://', '').replace('https://', '') + '.rar', u.replace('http://', '').replace('https://', '') + '.zip', u.replace('http://', '').replace('https://', '') + '.tar.gz' ]) if tld.parse_tld(u)[1]: current_info_dic.extend([ tld.parse_tld(u)[1] + ".rar", tld.parse_tld(u)[1] + ".zip", tld.parse_tld(u)[1] + ".tar.gz" ]) current_info_dic.extend([ u.replace('http://', '').replace('https://', '').replace( '.', '') + '.rar', u.replace('http://', '').replace('https://', '').replace( '.', '') + '.zip', u.replace('http://', '').replace('https://', '').replace( '.', '') + '.tar.gz' ]) current_info_dic.extend([ u.split('.', 1)[-1] + '.rar', u.split('.', 1)[-1] + '.zip', u.split('.', 1)[-1] + '.tar.gz' ]) current_info_dic.extend( [wwwhost + '.rar', wwwhost + '.zip', wwwhost + '.tar.gz']) # print(current_info_dic) """ 最终每个url对应可以扫描的字典部分如下 ['web.rar', 'web.zip', 'backup.rar', 'www.rar', 'bak.rar', 'wwwroot.zip', 'bak.zip', 'www.zip', 'wwwroot.rar', 'backup.zip', 'www.test.gov.cn.rar', 'www.test.gov.cn.zip', 'wwwtestgovcn.rar', 'wwwtestgovcn.zip', 'testgovcn.rar', 'testgovcn.zip', 'test.gov.cn.rar', 'test.gov.cn.zip'] ['web.rar', 'web.zip', 'backup.rar', 'www.rar', 'bak.rar', 'wwwroot.zip', 'bak.zip', 'www.zip', 'wwwroot.rar', 'backup.zip', 'www.baidu.com.rar', 'www.baidu.com.zip', 'wwwbaiducom.rar', 'wwwbaiducom.zip', 'baiducom.rar', 'baiducom.zip', 'baidu.com.rar', 'baidu.com.zip'] """ for info in current_info_dic: url = str(u1) + '/' + str(info) q.put(url) # print(q.__dict__['queue']) # 注释掉或删掉这行,如果url批量太多,会满屏幕打印队列中待扫描的地址 print('队列大小:' + str(q.qsize())) threadl = [BakScan(q) for _ in range(max_thread)] for t in threadl: t.start() for t in threadl: t.join()
def key_domain(dom: str) -> Tuple[str, str, str]: t = tld.parse_tld(dom, fail_silently=True, fix_protocol=True) return (t[1], t[0], t[2]) if t else (dom, "", "")