def get_content(self, url): logging.info('request: {0}'.format(url)) r = http_request_get(url).text if self.human_act(r) is True: return r else: self.get_content(url)
def get_sign_alexa_cn(self): """alexa.cn dectect signtrue, sig & keyt""" url = 'http://www.alexa.cn/index.php?url={0}'.format(self.domain) r = http_request_get(url).text sign = re.compile(r'(?<=showHint\(\').*?(?=\'\);)').findall(r) if len(sign) >= 1: return sign[0].split(',') else: return None
def run(self): try: self.cookie = self.get_cookie().get('cookie') url = '{0}/?restriction=site+contains&position=limited&host=.{1}'.format( self.site, self.domain) r = http_request_get(url, custom_cookie=self.cookie) self.parser(r.text) return list(set(self.subset)) except Exception as e: return self.subset
def fetch_chinaz(self): """get subdomains from alexa.chinaz.com""" url = 'http://alexa.chinaz.com/?domain={0}'.format(self.domain) r = http_request_get(url).content subs = re.compile(r'(?<="\>\r\n<li>).*?(?=</li>)') result = subs.findall(r) for sub in result: if is_domain(sub): self.subset.append(sub)
def fetch_chinaz(self): """get subdomains from alexa.chinaz.com""" url = 'http://alexa.chinaz.com/?domain={0}'.format(self.domain) r = http_request_get(url).text subs = re.compile(r'(?<="\>\r\n<li>).*?(?=</li>)') result = subs.findall(r) for sub in result: if is_domain(sub): self.subset.append(sub)
def fetch_ip138(self): """get subdomains from ip138.com""" url = 'http://site.ip138.com/{0}/domain.htm'.format(self.domain) r = http_request_get(url).content regx = r'<a.*>(.*\.%s)</a>' % self.domain # subs = re.compile(r'(?<="\>\r\n<li>).*?(?=</li>)') result = re.findall(regx, r) for sub in result: # print sub if is_domain(sub): self.subset.append(sub)
def run(self): try: while not self.queue.empty(): sub = self.queue.get_nowait() ret = http_request_get(sub) if ret.status_code in exclude_status and not re.findall(not_find_reg, ret.text): print sub self.count += 1 if self.count >= result_cnt: print '可能误报,请手动检查!' break except Exception, e: pass
def run(self): try: url = "{0}/searchApi/v2/domain/report/?domain={1}".format( self.website, self.domain) # content = curl_get_content(url).get('resp') content = http_request_get(url).text for sub in json.loads(content).get('subdomains'): if is_domain(sub): self.subset.append(sub) return list(set(self.subset)) except Exception as e: return self.subset
def run(self): try: url = "{0}/searchApi/v2/domain/report/?domain={1}".format(self.website, self.domain) # content = curl_get_content(url).get('resp') content = http_request_get(url).content for sub in json.loads(content).get('subdomains'): if is_domain(sub): self.subset.append(sub) return list(set(self.subset)) except Exception as e: logging.info(str(e)) return self.subset
def parser(self, response): npage = re.search('<A href="(.*?)"><b>Next page</b></a>', response) if npage: for item in self.get_subdomains(response): if is_domain(item): self.subset.append(item) nurl = '{0}{1}'.format(self.site, npage.group(1)) r = http_request_get(nurl, custom_cookie=self.cookie) time.sleep(3) self.parser(r.text) else: for item in self.get_subdomains(response): if is_domain(item): self.subset.append(item)
def run(self): try: url = "{0}/getData.php?e=subdomains_container&q={1}&t=0&rt=10&p=1".format( self.website, self.domain) # content = curl_get_content(url).get('resp') content = http_request_get(url).text _regex = re.compile(r'(?<=<a href\="domain.php\?q=).*?(?=">)') for sub in _regex.findall(content): if is_domain(sub): self.subset.append(sub) return list(set(self.subset)) except Exception as e: return self.subset
def execute(self): # 从 crt.sh 获取域名 url = "https://crt.sh/?Identity=%%.%s" % self.domain try: r = http_request_get(url) #print r if r.status_code == 200: root = etree.HTML(r.text) td_info = root.xpath(r'.//td[@class="outer"]/table/tr/td[4]') for td in td_info: if td.text: # 排除空的情况 if "@" not in td.text and 'SingleDomain' not in td.text: # 排除邮箱等情况 domain = td.text.split("=")[-1].replace("*.", "") self.result.add(domain) except Exception, e: print e.message
def parser_dnsname(self): for hashstr in self.hashs: try: callback = random_str() url = '{0}/cert?hash={1}&c={2}'.format( self.website, quote(hashstr), callback) content = http_request_get(url).content result = json.loads(content[27:-3]) if result.get('result').get('subject'): self.subjects.append(result.get('result').get('subject')) if result.get('result').get('dnsNames'): self.dns_names.extend(result.get('result').get('dnsNames')) except Exception as e: logging.info(str(e)) random_sleep()
def run(self): try: url = "{0}/getData.php?e=subdomains_container&q={1}&t=0&rt=10&p=1".format(self.website, self.domain) # content = curl_get_content(url).get('resp') content = http_request_get(url).content _regex = re.compile(r'(?<=<a href\="domain.php\?q=).*?(?=">)') for sub in _regex.findall(content): if is_domain(sub): self.subset.append(sub) return list(set(self.subset)) except Exception as e: logging.info(str(e)) return self.subset
def parser_dnsname(self): for hashstr in self.hashs: try: callback = random_str() url = '{0}/cert?hash={1}&c={2}'.format(self.website, quote(hashstr), callback) content = http_request_get(url).content result = json.loads(content[27:-3]) if result.get('result').get('subject'): self.subjects.append(result.get('result').get('subject')) if result.get('result').get('dnsNames'): self.dns_names.extend(result.get('result').get('dnsNames')) except Exception as e: logging.info(str(e)) random_sleep()
def parser_subject(self): try: callback = random_str() url = '{0}/search?domain={1}&incl_exp=true&incl_sub=true&token={2}&c={3}'.format( self.website, self.domain, quote(self.token), callback) content = http_request_get(url).content result = json.loads(content[27:-3]) self.token = result.get('nextPageToken') for subject in result.get('results'): if subject.get('subject'): self.dns_names.append(subject.get('subject')) if subject.get('hash'): self.hashs.append(subject.get('hash')) except Exception as e: logging.info(str(e)) if self.token: self.parser_subject()
def get_content(self, url): r = http_request_get(url).text if self.human_act(r) is True: return r else: self.get_content(url)