def cms_discern(target_url, write=False, output=None): cms_discern_table = PrettyTable(['Searching', 'Result']) print_info('正在识别CMS') url = 'http://whatweb.bugscaner.com/what.go/' result_url = url + target_url data = {'url': target_url, 'location_capcha': 'no'} referer = 'http://whatweb.bugscaner.com/look/' response = requests.post(url, data=data, headers=get_user_agent()).text # print(response) json_data = eval(response) # print(json_data) if write == False: for key, value in json_data.items(): cms_discern_table.add_row([key, value]) # print(key, value) print(cms_discern_table) else: f = open(output, 'w') f.write(target_url + '\n\n') for key, value in json_data.items(): f.write(str(key) + ' ' + str(value) + '\n') print_info('成功获取CMS指纹信息') print_info('保存路径为 ' + color.yellow(output)) f.close()
def request_url(url): # 判断url是否可以访问 try: r = requests.get(url, headers=get_user_agent()).status_code if r == 200: return (1, url) else: return (0, url) except: return (0, url)
def chinaz_icp_search(target_url, output=None): print_info('从icp.chinaz.com获取ICP备案信息') url = 'http://icp.chinaz.com/' info_table = PrettyTable(['Searching', 'Result']) if output != None: f = open(output, 'a') f.write('\n\nicp.chinaz.com\n\n') result_url = url + target_url # print(result_url) response = requests.get(result_url, headers=get_user_agent()).text try: sponsor_name_re = r'<a target="_blank" href="(.*?)">(.*?)</a>' sponsor_name = re.findall(sponsor_name_re, response)[0][1] info_table.add_row(['主办单位名称', sponsor_name]) f.write('主办单位名称 ' + sponsor_name + '\n') except: pass try: sponsor_quality_re = r'<p><strong class="fl fwnone">(.*?)</strong></p>' sponsor_quality = re.findall(sponsor_quality_re, response)[0] info_table.add_row(['主办单位性质', sponsor_quality]) f.write('主办单位性质 ' + sponsor_quality + '\n') except: pass try: ICP = re.findall('<p><font>(.*?)</font>', response)[0] info_table.add_row(['网站备案/许可证号', ICP]) f.write('网站备案/许可证号 ' + ICP + '\n') except: pass try: website_name = re.findall('<p>(.*?)</p>', response)[2] info_table.add_row(['网站名称', website_name]) f.write('网站名称 ' + website_name + '\n') except: pass try: website_home_url = re.findall('<p class="Wzno">(.*?)</p>', response)[0] info_table.add_row(['网站首页地址', website_home_url]) f.write('网站首页地址 ' + website_home_url + '\n') except: pass try: time_re = r'<p>(.*?)</p>' time = re.findall(time_re, response)[-1] # print(time) info_table.add_row(['审核时间', time]) f.write('审核时间 ' + time + '\n') except: pass print(info_table) if output != None: print_info('从chinaz获取ICP信息并保存到' + color.yellow(output)) f.close()
def request_get_url(pool_get_url): # 判断url是否可以访问 try: r = requests.request(method='GET', url=pool_get_url, headers=get_user_agent(), timeout=(3, 7)).status_code if r == 200: return 1, pool_get_url else: return 0, pool_get_url except: return 0, pool_get_url,
def c_segment_search(c_segment_url, write=False, output=None): ip_list = PrettyTable(['Searching', 'Result']) print_info('查询旁站和C段') url = 'http://webscan.cc/site_' result_url = url + c_segment_url + '/' # print(result_url) response = requests.get(result_url, headers=get_user_agent(), verify=False).text domain_ip_re = r'<h1>(.*?)</h1>' domain_ip = re.findall(domain_ip_re, response, re.S)[0] company_re = r'<h2>(.*?)</h2>' company = re.findall(company_re, response, re.S)[0] container_re = r'<td><p>(.*?)</p></td>' container = re.findall(container_re, response, re.S)[1] if write == False: ip_list.add_row(['IP地址', domain_ip]) ip_list.add_row(['公司', company]) ip_list.add_row(['站点容器', container]) title_re = r'<li class="J_link"><span>(.*?)</span>' title = re.findall(title_re, response) domain_result_re = r'target="_blank">(.*?)</a></li>' domain_result = re.findall(domain_result_re, response) # print(title) # print(domain_result) print_info('输出站点信息表') time.sleep(0.5) print(ip_list) same_table = PrettyTable(['title', 'url']) for i in range(len(title)): list_domain = [] list_domain.append(title[i]) list_domain.append(domain_result[i]) # print(list_domain) same_table.add_row(list_domain) print_info('同服IP站点列表') time.sleep(0.5) print(same_table) else: f = open(output, 'w') f.write(c_segment_url + '\n\n') f.write('IP地址 ' + domain_ip + '\n') f.write('公司 ' + company + '\n') f.write('站点容器 ' + container + '\n\n\n') title_re = r'<li class="J_link"><span>(.*?)</span>' title = re.findall(title_re, response) domain_result_re = r'target="_blank">(.*?)</a></li>' domain_result = re.findall(domain_result_re, response) for i in range(len(title)): f.write(title[i] + ' ' + domain_result[i] + '\n') print_info('成功获取C段和旁站信息') print_info('保存路径为 ' + color.yellow(output)) f.close()
def crawlergo_crawl(target): CRAWLERGO_OUTPUT_JSON = './output/' + target.replace('.', '_').replace( 'http://', '').replace('https://', '') + '_crawlergo.json' judge_file_delete(CRAWLERGO_OUTPUT_JSON) print_info('使用CrawLergo扫描') cmd = [ "./crawlergo", "-c", chrome_path, "-t", crawlergo_threads, "--robots-path", "--fuzz-path", "--custom-headers", json.dumps(get_user_agent()), "--output-json", CRAWLERGO_OUTPUT_JSON, target ] rsp = subprocess.Popen(cmd) rsp.communicate() result = get_crawlergo_dir(file=CRAWLERGO_OUTPUT_JSON) return result
def chinaz_whois_search(whois_url, write=False, output=None): # print(whois_url) url = 'http://whois.aizhan.com/' result_url = url + whois_url + '/' response = requests.get(result_url, headers=get_user_agent()).text define_name_re = r'<td class="thead">(.*?)</td>' tmp_define_name = re.findall(define_name_re, response) define_name = tmp_define_name[:-11] result_name_re = r'<td>(.*?)</td>' result_name = re.findall(result_name_re, response) result = result_name[:-11] # print(result) if write == False: chinaz_whois_table = PrettyTable(['information', 'result']) for i in range(len(define_name)): tmp_list = [] tmp_list.append(define_name[i]) tmp_result = result[i].replace('<span>', '') result_prettytable = tmp_result.replace('</span>', '') if re.match('<img src="https:', result_prettytable): result_prettytable = '-' if '<a href' in result_prettytable: place = result_prettytable.find('<a href="') result_prettytable = result_prettytable[:place] tmp_list.append(result_prettytable) chinaz_whois_table.add_row(tmp_list) print_info('从www.aizhan.com获取whois信息成功 输出whois信息表') time.sleep(0.5) print(chinaz_whois_table) else: f = open(output, 'w') f.write('www.aizhan.com查询到的信息如下' + '\n\n') for i in range(len(define_name)): f.write(define_name[i]) tmp_result = result[i].replace('<span>', '') result_prettytable = tmp_result.replace('</span>', '') if re.match('<img src="https:', result_prettytable): result_prettytable = '-' if '<a href' in result_prettytable: place = result_prettytable.find('<a href="') result_prettytable = result_prettytable[:place] f.write(' ' + result_prettytable + '\n') print_info('www.aizhan.com成功获取到whois信息') print_info('成功将文件写入到' + color.yellow(output)) # print_info('[target: ' + whois_url + ']' + ' ' + '写入路径为' + color.green(sys.path[0]) + color.green('\\output\\' + whois_url + '_whois.txt')) f.close()
def subdomain_crawl(subdomain_url, write=False): url = 'http://tool.chinaz.com/subdomain/?domain=' result_url = url + subdomain_url # print(result_url) response = requests.post(result_url, headers=get_user_agent()).text # print(response) re_page = r'</a><span class="col-gray02">共(.*?)页,到第</span>' page = int(re.findall(re_page, response, re.S)[0]) if write == False: print_info('查询' + color.green(subdomain_url) + '的子域名') subdomain_table = PrettyTable(['子域名']) if page == 1: temp_re_subdomain = r'<div class="w23-0 subdomain">(.*?)</a></div>' temp_subdomain = re.findall(temp_re_subdomain, response) list_subdomain = [] for i in temp_subdomain: re_subdomain = r'domain=(.*?)" target="_blank">' subdomain = re.findall(re_subdomain, i, re.S) list_subdomain.append(subdomain) subdomain_table.add(subdomain) else: for i in range(1, page): subdomain_page_url = 'http://tool.chinaz.com/subdomain?domain=' + subdomain_url + '&page=' + \ str(i) response = requests.post(subdomain_page_url, headers=get_user_agent()).text temp_re_subdomain = r'<div class="w23-0 subdomain">(.*?)</a></div>' temp_subdomain = re.findall(temp_re_subdomain, response) # print(temp_subdomain) list_subdomain = [] for i in temp_subdomain: re_subdomain = r'domain=(.*?)" target="_blank">' subdomain = re.findall(re_subdomain, i, re.S) # subdomain_table.add(subdomain) subdomain_table.add_row(subdomain) print(subdomain_table) else: f = open('./output/' + subdomain_url + '_subdomain.txt', 'w') if page == 1: temp_re_subdomain = r'<div class="w23-0 subdomain">(.*?)</a></div>' temp_subdomain = re.findall(temp_re_subdomain, response) list_subdomain = [] for i in temp_subdomain: re_subdomain = r'domain=(.*?)" target="_blank">' subdomain = re.findall(re_subdomain, i, re.S) list_subdomain.append(subdomain) subdomain_table.add(subdomain) else: for i in range(1, page): subdomain_page_url = 'http://tool.chinaz.com/subdomain?domain=' + subdomain_url + '&page=' + \ str(i) response = requests.post(subdomain_page_url, headers=get_user_agent()).text temp_re_subdomain = r'<div class="w23-0 subdomain">(.*?)</a></div>' temp_subdomain = re.findall(temp_re_subdomain, response) # print(temp_subdomain) list_subdomain = [] for i in temp_subdomain: re_subdomain = r'domain=(.*?)" target="_blank">' subdomain = re.findall(re_subdomain, i, re.S)[0] f.write(subdomain + '\n') print_info('写入完成') print_info('写入路径为' + color.green(sys.path[0]) + color.green('\\output\\' + subdomain_url + '_subdomain.txt')) f.close()
def multi_dir_scan_all(url, threads, dirtype, cookie): print_info('对' + url + '进行目录扫描') if url.startswith('http://'): url = url elif url.startswith('https://'): tmp_url = url.replace('https://', 'http://') url = tmp_url else: url = 'http://' + url scan_result_table = PrettyTable(['网站目录', '状态码']) print_info('扫描' + dirtype + '类型的文件') if dirtype == 'dir': dir_txt = open("./dir_dict/DIR.txt").readlines() all_url = dir_txt elif dirtype == 'php': php_txt = open("./dir_dict/PHP.txt").readlines() all_url = php_txt elif dirtype == 'asp': asp_txt = open("./dir_dict/ASP.txt").readlines() asp_two_txt = open("./dir_dict/ASP_TWO.txt").readlines() all_url = asp_txt + asp_two_txt elif dirtype == 'jsp': jsp_txt = open("./dir_dict/JSP.txt").readlines() all_url = jsp_txt elif dirtype == 'mdb': mdb_txt = open("./dir_dict/MDB.txt").readlines() all_url = mdb_txt else: php_txt = open("./dir_dict/PHP.txt").readlines() asp_txt = open("./dir_dict/ASP.txt").readlines() jsp_txt = open("./dir_dict/JSP.txt").readlines() mdb_txt = open("./dir_dict/MDB.txt").readlines() dir_txt = open("./dir_dict/DIR.txt").readlines() asp_two_txt = open("./dir_dict/ASP_TWO.txt").readlines() all_url = php_txt + asp_txt + jsp_txt + mdb_txt + asp_two_txt + dir_txt # all_url = ['/robots.txt', '/index.html'] scan_url_list = [] for line in all_url: if line.startswith('/'): scan_url = url + line.strip() else: scan_url = url + '/' + line.strip() if cookie == None: headers = get_user_agent() else: headers = get_user_agent() headers['Cookie'] = cookie scan_url_list.append((scan_url, headers)) # print(scan_url_list[:5]) thread_count = int(threads) print_info('对' + color.green(url) + color.green('目录进行全面扫描')) print_info('启用' + color.green(str(thread_count)) + color.green('个线程')) with Pool(thread_count) as p: pool_result = list( tqdm(p.imap(dir_alive_url, scan_url_list), total=len(scan_url_list))) for result in pool_result: if result[0] == 1: scan_result_table.add_row([result[1], result[2]]) print_info(color.green('扫描的网址是') + color.yellow(url)) print(scan_result_table)
def who_is_whois_search(whois_url, write=False, output=None): url = 'http://who.is/whois/' + whois_url response = requests.get(url, headers=get_user_agent()).text block_title = r'<span class="lead">(.*?)</span>' registrar_info = re.findall(block_title, response, re.S)[0] registrar_who_is_table = PrettyTable([registrar_info, 'Regist Info']) tmp_title = r'<div class="col-md-4 queryResponseBodyKey">(.*?)</div>' TMP_NAME = re.findall(tmp_title, response, re.S)[0] TMP_WHOIS_SERVER = re.findall(tmp_title, response, re.S)[1] TMP_REFERRAL_URL = re.findall(tmp_title, response, re.S)[2] TMP_STATUS = re.findall(tmp_title, response, re.S)[3] NAME = TMP_NAME.replace('Name', '姓名') WHOIS_SERVER = TMP_WHOIS_SERVER.replace('Whois Server', 'whois服务器') REFERRAL_URL = TMP_REFERRAL_URL.replace('Referral URL', '转介网站') STATUS = TMP_STATUS.replace('Status', '状态') registrar_info_result_re = r'<div class="col-md-8 queryResponseBodyValue">(.*?)</div>' name = re.findall(registrar_info_result_re, response, re.S)[0] whois_server = re.findall(registrar_info_result_re, response, re.S)[1] referral_url = re.findall(registrar_info_result_re, response, re.S)[2] status_re = r'''<div class="col-md-8 queryResponseBodyValue"> (.*?) </div>''' tmp_status = re.findall(status_re, response, re.S)[0] status = tmp_status.replace('<br>', '\n') registrar_who_is_table.add_row([NAME, name]) registrar_who_is_table.add_row([WHOIS_SERVER, whois_server]) registrar_who_is_table.add_row([REFERRAL_URL, referral_url]) registrar_who_is_table.add_row([STATUS, status]) date_whois_table = PrettyTable(['Important Dates', 'DateTime']) TMP_EXPIRES_ON = re.findall(tmp_title, response, re.S)[4] TMP_REGISTERED_ON = re.findall(tmp_title, response, re.S)[5] TMP_UPDATED_ON = re.findall(tmp_title, response, re.S)[6] EXPIRES_ON = TMP_EXPIRES_ON.replace('Expires On', '到期时间') REGISTERED_ON = TMP_REGISTERED_ON.replace('Registered On', '注册时间') UPDATED_ON = TMP_UPDATED_ON.replace('Updated On', '更新时间') expires_on = re.findall(registrar_info_result_re, response, re.S)[4] registered_on = re.findall(registrar_info_result_re, response, re.S)[5] updated_on = re.findall(registrar_info_result_re, response, re.S)[6] date_whois_table.add_row([EXPIRES_ON, expires_on]) date_whois_table.add_row([REGISTERED_ON, registered_on]) date_whois_table.add_row([UPDATED_ON, updated_on]) name_server_table = PrettyTable(['Name Server', 'Server IP']) name_server_re = r'<a href="/nameserver/(.*?)">(.*?)</a>' name_server = re.findall(name_server_re, response) name_server_ip_re = r'<a href="/whois-ip/ip-address/(.*?)">(.*?)</a>' name_server_ip = re.findall(name_server_ip_re, response) for i in range(len(name_server)): name_server_table.add_row([name_server[i][1], name_server_ip[i][1]]) if write == False: # print_info('注册信息表') print_info('从who.is获取whois信息成功 输出whois注册信息表') time.sleep(0.5) print(registrar_who_is_table) # print_info('时间信息表') print_info('从who.is获取whois信息成功 输出whois时间信息表') time.sleep(0.5) print(date_whois_table) print_info('从who.is获取whois信息成功 输出whois Name Servers表') time.sleep(0.5) print(name_server_table) elif write == True: f = open(output, 'a') f.write('\n\nwho.is查询到的信息如下' + '\n\n') f.write(NAME + ' ' + name + '\n') f.write(WHOIS_SERVER + ' ' + whois_server + '\n') f.write(REFERRAL_URL + ' ' + referral_url + '\n') f.write(STATUS + ' ' + status + '\n') f.write('\n') f.write(EXPIRES_ON + ' ' + expires_on + '\n') f.write(REGISTERED_ON + ' ' + registered_on + '\n') f.write(UPDATED_ON + ' ' + updated_on + '\n') f.write('\n') for i in range(len(name_server)): f.write(name_server[i][1] + ' ' + name_server_ip[i][1] + '\n') print_info('who.is成功获取到whois信息') print_info('成功将文件写入到' + color.yellow(output)) else: print_error('参数错误')
def dir_scan(url, threads, dirtype, cookie): if url.startswith('http://'): _url = url elif url.startswith('https://'): tmp_url = url.replace('https://', 'http://') _url = tmp_url else: _url = 'http://' + url print_info('扫描' + color.yellow(dirtype) + color.green('类型的文件'), url) # print(_url) if dirtype == 'dir': dir_txt = open("./dir_dict/DIR.txt").readlines() all_url = dir_txt elif dirtype == 'php': php_txt = open("./dir_dict/PHP.txt").readlines() all_url = php_txt elif dirtype == 'asp': asp_txt = open("./dir_dict/ASP.txt").readlines() asp_two_txt = open("./dir_dict/ASP_TWO.txt").readlines() all_url = asp_txt + asp_two_txt elif dirtype == 'jsp': jsp_txt = open("./dir_dict/JSP.txt").readlines() all_url = jsp_txt elif dirtype == 'mdb': mdb_txt = open("./dir_dict/MDB.txt").readlines() all_url = mdb_txt else: php_txt = open("./dir_dict/PHP.txt").readlines() asp_txt = open("./dir_dict/ASP.txt").readlines() jsp_txt = open("./dir_dict/JSP.txt").readlines() mdb_txt = open("./dir_dict/MDB.txt").readlines() dir_txt = open("./dir_dict/DIR.txt").readlines() asp_two_txt = open("./dir_dict/ASP_TWO.txt").readlines() all_url = php_txt + asp_txt + jsp_txt + mdb_txt + asp_two_txt + dir_txt print_info('对' + color.yellow(_url) + color.green('目录进行全面扫描'), url) # all_url = ['/robots.txt', '/index.html'] scan_url_list = [] for line in all_url: if line.startswith('/'): scan_url = _url + line.strip() else: scan_url = _url + '/' + line.strip() if cookie == None: headers = get_user_agent() else: headers = get_user_agent() headers['Cookie'] = cookie scan_url_list.append((scan_url, headers)) # print(scan_url_list[:5]) thread_count = threads print_info('启用' + str(thread_count) + '个线程', url) with Pool(thread_count) as p: pool_result = list( tqdm(p.imap(dir_alive_url, scan_url_list), total=len(scan_url_list))) for result in pool_result: if result[0] == 1: print( color.green(str(result[1])) + ' ' + color.green(str(result[2])))
def icp_search(ICP, write=False, output=None): print_info('从icp.aizhan.com查询ICP备案号') url = 'https://icp.aizhan.com/' result_url = url + ICP + '/' info_table = PrettyTable(['Searching', 'Result']) response = requests.get(result_url, headers=get_user_agent()).text info_re = r'<td>(.*?)</td>' sponsor_name = re.findall(info_re, response, re.S)[0] info_table.add_row(['主办单位名称', sponsor_name]) sponsor_quality = re.findall(info_re, response, re.S)[1] info_table.add_row(['主办单位性质', sponsor_quality]) sponsor_quality = re.findall(info_re, response, re.S)[2] tmp_sponsor_quality = sponsor_quality.replace('<span>', '') sponsor_quality = tmp_sponsor_quality.replace('</span>', '') info_table.add_row(['备案号', sponsor_quality]) website_name = re.findall(info_re, response, re.S)[3] # print(website_name) info_table.add_row(['网站名称', website_name]) tmp_website_home = re.findall(info_re, response, re.S)[4] website_home = tmp_website_home.replace('<br />', '\n') # print(website_home) info_table.add_row(['网站首页地址', website_home]) tmp_authentication = re.findall(info_re, response, re.S)[5] authentication_re = r'<a href="(.*?)" ref="nofollow" target="_blank">' authentication = re.findall(authentication_re, tmp_authentication, re.S)[0] info_table.add_row(['认证', authentication]) # print(authentication) website_domain_name = re.findall(info_re, response, re.S)[6] info_table.add_row(['网站域名', website_domain_name]) # print(website_domain_name) audit_time = re.findall(info_re, response, re.S)[7] tmp_time = audit_time.replace('<span>', '') audit_time = tmp_time.replace('</span>', '') info_table.add_row(['审核时间', audit_time]) icp_table = PrettyTable(['该单位备案网站', '网站名称', '网站首页地址', '审核时间']) website_icp_list_re = r'<td class="center"><span>(.*?)</span></td>' website_icp_list = re.findall(website_icp_list_re, response) # print(website_icp_list[:-2]) website_name_two_re = r'<td class="center">(.*?)</td>' website_name_two = re.findall(website_name_two_re, response)[5:-2:2] # print(re.findall(website_name_two_re, response)) # print(website_icp_list) website_home_url_re = r'''<td class="center"><span class="blue"> (.*?) </span></td> <td class="center">''' website_home_url = re.findall(website_home_url_re, response) website_audit_time_re = r'''<td class="center"> <span> (.*?) </span> </td>''' website_audit_time = re.findall(website_audit_time_re, response) # print(website_audit_time) # print(website_home_url) if write == False: print(info_table) for i in range(0, len(website_icp_list) - 2): website_name_two_list = [] website_name_two_list.append(website_icp_list[i]) website_name_two_list.append(website_name_two[i]) website_name_two_list.append(website_home_url[i].replace( '<br />', '\n')) website_name_two_list.append(website_audit_time[i]) icp_table.add_row(website_name_two_list) print_info('成功获取ICP信息') print(icp_table) elif write == True: f = open(output, 'w') print_info('成功获取ICP信息') print_info('保存路径为' + color.yellow(output)) f.write('主办单位名称 ' + sponsor_name + '\n') f.write('主办单位性质 ' + sponsor_quality + '\n') f.write('网站名称 ' + website_name + '\n') f.write('网站首页地址 ' + website_home + '\n') f.write('认证 ' + authentication + '\n') f.write('网站域名 ' + website_domain_name + '\n') f.write('备案号 ' + sponsor_quality + '\n') f.write('审核时间 ' + audit_time + '\n') f.write( '该单位备案网站 网站名称 网站首页地址 审核时间\n\n\n' ) for i in range(0, len(website_icp_list) - 2): f.write(website_icp_list[i] + ' ' + website_name_two[i] + ' ' + website_home_url[i] + ' ' + website_audit_time[i] + '\n') f.close() else: print_error('参数错误')