def ipc_record(simple_url): ipc_url = "https://icp.aizhan.com/" + simple_url print(current_time("yellow").format("站长工具备案查询中...")) print(current_time("sep_prefix")) result = get_req(ipc_url) if result: result_text = result.text url_error = re.findall("未找到", result_text, re.S) unit_name = re.findall("主办单位名称.*?<td>(.*?)</td>", result_text, re.S) unit_nature = re.findall("主办单位性质.*?<td>(.*?)</td>", result_text, re.S) website_filing = re.findall("网站备案/许可证号.*?<span>(.*?)</span>", result_text, re.S) website_name = re.findall("网站名称.*?<td>(.*?)</td>", result_text, re.S) Website_owner = re.findall("网站负责人.*?<td>(.*?)</td>", result_text, re.S) Legal_representative = re.findall("法定代表人.*?<span>(.*?)</span>", result_text, re.S) Company_address = re.findall("公司地址.*?title=\"(.*?)\">", result_text, re.S) industry = re.findall("行业.*?<span>(.*?)</span>", result_text, re.S) found_time = re.findall("成立时间.*?<span>(.*?)</span>", result_text, re.S) if url_error: print(current_time("red").format("url有误,请重试:" + ipc_url)) print(current_time("sep_suffix")) return if unit_name: print( current_time("green").format("主办单位名称:{}").format(unit_name[0])) if industry: print(current_time("green").format("行业:{}").format(industry[0])) if found_time: print( current_time("green").format("成立时间:{}").format(found_time[0])) if unit_nature: print( current_time("green").format("主办单位性质:{}").format( unit_nature[0])) if website_filing: print( current_time("green").format("网站备案/许可证号:{}").format( website_filing[0])) if website_name: print( current_time("green").format("网站名称:{}").format( website_name[0])) if Website_owner: if not Website_owner[0][0] == "\'": print( current_time("green").format("网站负责人:{}").format( Website_owner[0])) if Legal_representative: print( current_time("green").format("法定代表人:{}").format( Legal_representative[0])) if Company_address: print( current_time("green").format("公司地址:{}").format( Company_address[0])) print(current_time("sep_suffix"))
def ssl_domain(simple_url): query_url = "https://crt.sh/?q={}".format(simple_url) content = get_req(query_url).text pattern = re.compile( "<TD>(\w{0,100}\.\w{0,100}\.?\w{0,100}\.?\w{0,100}\.?\w{0,100}\.?\w{0,100}\.?\w{0,100}\.?\w{0,100})</TD>", re.S) if content: reg_match(content, pattern)
def searchdns_netcraft_com(simple_url): query_url = "https://searchdns.netcraft.com/?restriction=site+contains&host={}&position=limited".format( simple_url) cookie = "netcraft_js_verification_challenge=djF8RUlTNmw3d3c3M2c1WlNOOWZ0UHBMOTNPZ3YzWmVjY0tXWDBnUm9xSEFid1c4dEtOcjNoTHRp%0AaTRUWTBxUTBZOHo2NkFUbEtGdlpnSQovanQ5eStTditRPT0KfDE1ODI4MTU5Mzc%3D%0A%7Cab7e7165c79edb9371c422b504bf4cdd28713046; netcraft_js_verification_response=957f5bb05b956dbd8a1790ae4529e9ad0e0b6eb2" cookie_dict = { i.split("=")[0]: i.split("=")[-1] for i in cookie.split("; ") } content = get_req(query_url, cookies=cookie_dict).text pattern = re.compile("""rel="nofollow">.*?(\w{0,100}\..*?<b>.*?)</b>""", re.S) if content: reg_match(content, pattern)
def get_url_result(html_file): while True: if queue.empty(): break else: url = queue.get() ip_list.append(url) result = get_req(url) if result == None: continue response_code = result.status_code result_encode = get_encode(result) result.encoding = result_encode response_title = get_title(result.text) write_file( """<a href="{}">{}</a>  {}  {}<br>""". format(url, url, response_code, response_title), html_file)
def aizhan_whois(simple_url): query_url = "https://whois.aizhan.com/{}".format(simple_url) + "/" print(current_time("yellow").format("爱站whois查询中...")) print(current_time("sep_prefix")) result = get_req(query_url) if result: try: result_text = result.text domain = re.findall("""<td class="thead">域名</td>.*?<td>(.*?)<""", result_text, re.S)[0] if domain.strip()[0] == "\'": print(current_time("red").format("url有误,请重试:" + query_url)) print(current_time("sep_suffix")) return except: print(current_time("red").format("url有误,请重试:" + query_url)) return registrar = re.findall("""<td class="thead">注册商</td>.*?<td>(.*?)<""", result_text, re.S)[0] domain_name = re.findall("""<td class="thead">域名持有人/机构名称</td>.*?<td>(.*?)<""", result_text, re.S)[0] create_time = re.findall("""<td class="thead">创建时间</td>.*?<span>(.*?)<""", result_text, re.S)[0] re_time = re.findall("""<td class="thead">更新时间</td>.*?<td>.*?<span>(.*?)<""", result_text, re.S)[0] expiration_date = re.findall("""<td class="thead">过期时间</td>.*?<span>(.*?)<""", result_text, re.S)[0] domain_server = re.findall("""<td class="thead">域名服务器</td>.*?<td>(\w.*?)<""", result_text, re.S) dns_server = re.findall("""<td class="thead">DNS服务器</td>.*?<td>(\w.*?)<""", result_text, re.S) try: email = re.findall("Tech Email.*?>(.*?)<", result_text, re.S)[0] except: email = [] try: Tech_City = re.findall("Tech City.*?>(.*?)<", result_text, re.S)[0] except: Tech_City = [] try: Tech_Street = re.findall("Tech Street.*?>(.*?)<", result_text, re.S)[0] except: Tech_Street = [] if domain.strip() and domain.strip() != "-": print(current_time("green").format("域名:" + domain)) if registrar.strip() and registrar.strip() != "-": print(current_time("green").format("注册商:" + registrar)) if domain_name.strip() and domain_name.strip() != "-": print(current_time("green").format("域名持有人/机构名称:" + domain_name)) if create_time.strip() and create_time.strip() != "-": print(current_time("green").format("创建时间:" + create_time)) if re_time.strip() and re_time.strip() != "-": print(current_time("green").format("更新时间:" + re_time)) if expiration_date.strip() and expiration_date.strip() != "-": print(current_time("green").format("过期时间:" + expiration_date)) if domain_server: print(current_time("green").format("域名服务器:" + str(domain_server))) if dns_server: print(current_time("green").format("dns服务器:" + str(dns_server))) if email: print(current_time("green").format("注册人邮箱:" + str(email))) if Tech_City: print(current_time("green").format("城市:" + str(Tech_City))) if Tech_Street: print(current_time("green").format("街道:" + str(Tech_Street))) else: print(current_time("red").format(" 请求错误...")) print(current_time("sep_suffix"))