def get_rootdomains_by_domain(cls, domain): """ function to get others rootdomain by a known rootdomain """ domain = Domain.get_domain(domain) zt_name = cls().query_zt_by_domain(domain) domains = cls().query_domains_by_zt(zt_name) root_domain = Domain.get_root_domain(domain) if root_domain: domains.append(domain) # todo: Judge the domain/ip format return domains
def __classify_result(self, domain_list): domains = [] root_domains = [] ips = [] for domain in domain_list: domain = Domain.get_domain(domain) domain_type = get_domain_type(domain) if domain_type == 'ip' and domain not in ips: ips.append(domain) elif domain_type == 'root_domain' and domain not in root_domains: root_domains.append(domain) elif domain_type == 'domain' and domain not in domains: domains.append(domain) return domains, root_domains, ips
def __classify_result(self, domain_list): domains = [] root_domains = [] ips = [] for domain in domain_list: domain = Domain.get_domain(domain) domain_type = get_domain_type(domain) if domain_type == "ip" and domain not in ips: ips.append(domain) elif domain_type == "root_domain" and domain not in root_domains: root_domains.append(domain) elif domain_type == "domain" and domain not in domains: domains.append(domain) return domains, root_domains, ips
def get_domains_by_ip(self, ip): print_status('Start get domain by ip through bing...') url = 'http://cn.bing.com/search?q=ip:%s&first=999999991&FORM=PERE' % ip html = request(url, 'GET') domain_regx = r''' <h2><a\shref="https?://([^"]*?)"\starget="_blank"\sh="ID=[^"]*?">[^<]*?</a></h2> ''' domain_list = re.findall(domain_regx, html, re.X) total_page_regx = r'''<span\sclass="sb_count">\d*?\s-\s\d*?\s[^\(]*?\([^\s]*?\s(\d*?)\s[^\)]*?\)</span>''' result = re.search(total_page_regx, html) try: total_num = int(result.group(1).replace(',', '')) except: total_num = 9 page_count = total_num / 9 if total_num % 9 > 0: page_count += 1 print_status('Total pages: %s, Total domains:%s' % (page_count, total_num)) if page_count > 0: for n in range(total_num-1): print_status('Get page %s domains...' % str(n+1)) url = 'http://cn.bing.com/search?q=ip:%s&first=%s1&FORM=PERE3' % (ip, n) html = request(url, 'GET') new_domain_list = re.findall(domain_regx, html, re.X) domain_list.extend(new_domain_list) time.sleep(20) domain_list = [ Domain.get_domain(domain) for domain in domain_list if Domain.get_domain(domain) ] return list(set(domain_list))
def start(self, domain, domain_type, level): super(icp, self).start(domain, domain_type, level) target_domain = Domain.get_domain(domain) zt_name = self.query_zt_by_domain(target_domain) domains = self.query_domains_by_zt(zt_name) domains.append(target_domain) ips = [] root_domains = [] for domain in domains: if is_ip(domain) and domain not in ips: ips.append(domain) elif is_url(domain) and domain not in root_domains: root_domains.append(domain) self.result = {'root_domain': root_domains, 'ip': ips, 'domain': []} super(icp, self).end() return self.result
def get_subdomain_by_links(self, domain, level=4): domain = Domain.get_domain(domain) url = 'http://i.links.cn/subdomain/' data = { 'domain': domain, 'b2': 1, 'b3': 1 if level>=3 else 0, 'b4': 1 if level>=4 else 0, } html = request(url, 'POST', data=data) regex = '''<a\shref="http://[^"]*?"\srel=nofollow\starget=_blank>http://([^"]*?)</a></div>''' try: result = re.findall(regex, html) except: result = [] time.sleep(3) result.append(domain) return list(set(result))
def get_subdomain_by_links(self, domain, level=4): domain = Domain.get_domain(domain) url = 'http://i.links.cn/subdomain/' data = { 'domain': domain, 'b2': 1, 'b3': 1 if level >= 3 else 0, 'b4': 1 if level >= 4 else 0, } html = request(url, 'POST', data=data) regex = '''<a\shref="http://[^"]*?"\srel=nofollow\starget=_blank>http://([^"]*?)</a></div>''' try: result = re.findall(regex, html) except: result = [] time.sleep(3) result.append(domain) return list(set(result))
def start(self, domain, domain_type, level): super(icp, self).start(domain, domain_type, level) target_domain = Domain.get_domain(domain) zt_name = self.query_zt_by_domain(target_domain) domains = self.query_domains_by_zt(zt_name) domains.append(target_domain) ips = [] root_domains = [] for domain in domains: if is_ip(domain) and domain not in ips: ips.append(domain) elif is_url(domain) and domain not in root_domains: root_domains.append(domain) self.result = { 'root_domain': root_domains, 'ip': ips, 'domain': [] } super(icp, self).end() return self.result