fix_log_file() request_issue_creation() elif opt.spiderWebSite: if not URL_REGEX.match(opt.spiderWebSite): err_msg = "URL did not match a true URL{}..." if "www" in opt.spiderWebSite: err_msg = err_msg.format(" issue seems to be that 'www' is in the URL, " "replace with http(s)://") else: err_msg = err_msg.format("") raise InvalidInputProvided( err_msg ) else: if URL_QUERY_REGEX.match(opt.spiderWebSite): is_sure = prompt( "it is recomened to not use a URL that has a GET(query) parameter in it, " "would you like to continue", "yN" ) if is_sure.lower().startswith("y"): pass else: shutdown() blackwidow.blackwidow_main(opt.spiderWebSite, agent=agent_to_use, proxy=proxy_to_use, verbose=opt.runInVerbose) urls_to_use = get_latest_log_file(SPIDER_LOG_PATH) if opt.runSqliScan or opt.runPortScan or opt.intelCheck or opt.adminPanelFinder or opt.runXssScan: with open(urls_to_use) as urls:
def parse_search_results(query, url, verbose=False, dirname="{}/log/url-log", filename="url-log-{}.log", **kwargs): """ Parse a webpage from Google for URL's with a GET(query) parameter """ exclude = "google" or "webcache" or "youtube" create_dir(dirname.format(os.getcwd())) full_file_path = "{}/{}".format( dirname.format(os.getcwd()), filename.format(len(os.listdir(dirname.format(os.getcwd()))) + 1)) def __get_headers(): try: proxy_string = kwargs.get("proxy") except: pass try: user_agent = kwargs.get("agent") except: pass return proxy_string, user_agent if verbose: logger.debug( set_color("checking for user-agent and proxy configuration...", level=10)) proxy_string, user_agent = __get_headers() if proxy_string is None: proxy_string = None else: proxy_string = proxy_string_to_dict(proxy_string) if user_agent is None: user_agent = DEFAULT_USER_AGENT else: user_agent = user_agent user_agent_info = "adjusting user-agent header to {}..." if user_agent is not DEFAULT_USER_AGENT: user_agent_info = user_agent_info.format(user_agent.strip()) else: user_agent_info = user_agent_info.format( "default user agent '{}'".format(DEFAULT_USER_AGENT)) proxy_string_info = "setting proxy to {}..." if proxy_string is not None: proxy_string_info = proxy_string_info.format( ''.join(proxy_string.keys()) + "://" + ''.join(proxy_string.values())) else: proxy_string_info = "no proxy configuration detected..." headers = {"Connection": "close", "user-agent": user_agent} logger.info(set_color("attempting to gather query URL...")) try: query_url = get_urls(query, url, verbose=verbose, user_agent=user_agent, proxy=proxy_string) except Exception as e: if "WebDriverException" in str(e): logger.exception( set_color( "it seems that you exited the browser, please allow the browser " "to complete it's run so that Zeus can bypass captchas and API " "calls", level=50)) else: logger.exception( set_color( "{} failed to gather the URL from search engine, caught exception '{}' " "exception has been logged to current log file...".format( os.path.basename(__file__), str(e).strip()), level=50)) shutdown() logger.info( set_color( "URL successfully gathered, searching for GET parameters...")) logger.info(set_color(proxy_string_info)) req = requests.get(query_url, proxies=proxy_string) logger.info(set_color(user_agent_info)) req.headers.update(headers) found_urls = URL_REGEX.findall(req.text) retval = set() for urls in list(found_urls): for url in list(urls): url = urllib.unquote(url) if URL_QUERY_REGEX.match(url) and exclude not in url: if type(url) is unicode: url = str(url).encode("utf-8") if verbose: logger.debug( set_color("found '{}'...".format(url), level=10)) retval.add(url.split("&")[0]) logger.info( set_color("found a total of {} URL's with a GET parameter...".format( len(retval)))) if len(retval) != 0: logger.info( set_color( "saving found URL's under '{}'...".format(full_file_path))) with open(full_file_path, "a+") as log: for url in list(retval): log.write(url + "\n") else: logger.critical( set_color( "did not find any usable URL's with the given query '{}' " "using search engine '{}'...".format(query, url), level=50)) shutdown() return list(retval) if len(retval) != 0 else None
def search_multiple_pages(query, link_amount, proxy=None, agent=None, verbose=False): def __config_proxy(proxy_string): proxy_type_schema = { "http": httplib2.socks.PROXY_TYPE_HTTP, "socks4": httplib2.socks.PROXY_TYPE_SOCKS4, "socks5": httplib2.socks.PROXY_TYPE_SOCKS5 } proxy_type = get_proxy_type(proxy_string)[0] proxy_dict = proxy_string_to_dict(proxy_string) proxy_config = httplib2.ProxyInfo( proxy_type=proxy_type_schema[proxy_type], proxy_host="".join(proxy_dict.keys()), proxy_port="".join(proxy_dict.values())) return proxy_config if proxy is not None: if verbose: logger.debug( set_color("configuring to use proxy '{}'...".format(proxy), level=10)) __config_proxy(proxy) if agent is not None: if verbose: logger.debug( set_color("settings user-agent to '{}'...".format(agent), level=10)) logger.warning( set_color( "multiple pages will be searched using Google's API client, searches may be blocked after a certain " "amount of time...", level=30)) results, limit, found, index = set(), link_amount, 0, google_api.search( query, user_agent=agent, safe="on") try: while limit > 0: results.add(next(index)) limit -= 1 found += 1 except Exception as e: if "Error 503" in str(e): logger.fatal( set_color( "Google is blocking the current IP address, dumping already found URL's...", level=50)) results = results pass retval = set() for url in results: if URL_REGEX.match(url) and URL_QUERY_REGEX.match(url): if verbose: logger.debug(set_color("found '{}'...".format(url), level=10)) retval.add(url) if len(retval) != 0: logger.info( set_color( "a total of {} links found out of requested {}...".format( len(retval), link_amount))) write_to_log_file(list(retval), URL_LOG_PATH, "url-log-{}.log") else: logger.error( set_color("unable to extract URL's from results...", level=40))
def detection_main( url, payloads, cursor, request_type="GET", post_data=None, user_agent=get_random_agent(), provided_headers=None, proxy=None, verbose=False, skip_bypass_check=False, verification_number=None, # 暂时屏蔽, 没什么卵用 # fingerprint_waf=False, formatted=False, tamper_int=5, use_yaml=False, use_json=False, use_csv=False, traffic_file=None, throttle=0, request_timeout=15, # 这个 determine_server 应该要默认开启 # determine_server=False, threaded=None, force_file_creation=False, save_file_copy_path=None): """ main detection function :param url: url :param payloads: payloads :param cursor: databse cursor :param request_type: get or post :param post_data: post data you given to :param user_agent: User Agent :param provided_headers: custom headers Dic type :param proxy: proxy :param verbose: verbose mode default False :param skip_bypass_check: skip payload bypass check :param verification_number: :param formatted: :param tamper_int: :param use_yaml: :param use_json: :param use_csv: :param traffic_file: :param throttle: :param request_timeout: :param threaded: :param force_file_creation: :param save_file_copy_path: :return: response count 发送的总请求的数量 """ # 保险, 还是初始化一下 url = normalization_url(url) if url[-1] != "/": url += "/" current_url_netloc = urlparse.urlparse(url).netloc # 如果没有检测出 url 中的参数, 可能会干扰检测结果, 如果是 POST 请求呢? if URL_QUERY_REGEX.search( str(url)) is None and request_type.lower() == "get": warning( "URL does not appear to have a query (parameter), this may interfere with the detection results" ) # 是否在 url 中有 * 的地方放置 attack payload? if '*' in url: choice = prompt( "custom placement marker found in URL `*` would you like to use it to place the attacks", "yN") if choice.lower().startswith("y"): use_placement = True else: use_placement = False else: use_placement = False if use_yaml: file_path = YAML_FILE_PATH elif use_json: file_path = JSON_FILE_PATH elif use_csv: file_path = CSV_FILE_PATH else: file_path = None try: file_start = url.split("/")[2].split(".")[1] if use_json: ext = ".json" elif use_yaml: ext = ".yaml" elif use_csv: ext = ".csv" else: ext = '.txt' filename = "{}{}".format(file_start, ext) except: if use_json: file_type = "json" elif use_csv: file_type = 'csv' elif use_yaml: file_type = 'yaml' else: file_type = 'txt' filename = random_string(length=10, file_type=file_type) info("request type: {}".format(request_type)) # 检查是否为无效 POST data if request_type.lower() == 'post': if len(post_data) == 0: warning("no POST string supplied generating random") post_data = generate_random_post_string() info("random POST string to be sent: '{}'".format(post_data)) elif post_data is not None and post_data != "": info("POST string to be sent: '{}'".format(post_data)) # 如果不是有效 url, 就抛出异常 if validate_url(url) is None: raise InvalidURLProvided info("gathering HTTP responses") if threaded: # 如果指定了 thread responses_list = DetectionQueue( url, payloads, proxy=proxy, agent=user_agent, verbose=verbose, provided_headers=provided_headers, traffic_file=traffic_file, throttle=throttle, timeout=request_timeout, request_type=request_type, post_data=post_data, threaded=threaded, placement=use_placement).threaded_get_response() else: # 这个 response 是形如多个 -> ('GET https://example.org', '200 OK', 'soup对象', "{'User_Agent': 'f**k'}") # 这样的集合 responses_list = DetectionQueue( url, payloads, request_type=request_type, post_data=post_data, provided_headers=provided_headers, agent=user_agent, proxy=proxy, verbose=verbose, # save_fingerprint=fingerprint_waf, # --traffic FILENAME # traffic_file=traffic_file, throttle=throttle, timeout=request_timeout, placement=use_placement).get_response() # 指定了 --traffic, 保存进文件 if traffic_file is not None: with open(traffic_file, "a+") as traffic: for i, item in enumerate(responses_list, start=1): param, status_code, content, headers = item traffic.write( "HTTP Request #{}\n{}\nRequest Status Code: {}\n<!--\n{} HTTP/1.1\n{}\n-->{}\n\n\n" .format( i, "-" * 30, status_code, param, "\n".join([ "{}: {}".format(h, v) for h, v in headers.items() ]), content)) info("gathering normal response to compare against") # 上面是请求的带有 payload 的路径和 爆破 admin 路径的 url, 这里是请求原有的 url, 但是那url中的 * 怎么办? normal_response = get_page(url, proxy=proxy, user_agent=user_agent, provided_headers=provided_headers, throttle=throttle, timeout=request_timeout, request_method=request_type, post_data=post_data) # --determine-webserver # 就是检查 response headers 中的 server, 例如 Apache2 什么的 # 默认带上 # if determine_server: found_webserver = None # 这个 response_list 是形如多个 -> ('GET https://example.org', '200 OK', 'soup对象', "{'User_Agent': 'f**k'}") # 这样的集合 headers = {} for resp in responses_list: headers = resp[-1] for k in headers.keys(): if k.lower() == "server": found_webserver = headers[k] break if found_webserver is None: warning("unable to determine web server") else: success("web server determined as: {}".format(found_webserver)) # 加载 所有的 plugins, 然后返回所有的已导入的 plugin 的列表 info("loading firewall detection scripts") loaded_plugins = ScriptQueue(PLUGINS_DIRECTORY, PLUGINS_IMPORT_TEMPLATE, verbose=verbose).load_scripts() success("loading firewall detection scripts success") info("running firewall detection checks") # plus one for get_page() call request_count = len(responses_list) + 1 amount_of_products = 0 detected_protections = set() # temp = [] for item in responses_list: item = item if item is not None else normal_response _, status, html, headers = item for plugin in loaded_plugins: try: if plugin.detect(str(html), status=status, headers=headers) is True: # 先丢着 # temp.append(plugin.__product__) # plugin 的介绍不可能是 Unknown Firewall # if plugin.__product__ == UNKNOWN_FIREWALL_NAME and len(temp) == 1 and status != 0: # warning("unknown firewall detected saving fingerprint to log file") # path = create_fingerprint(url, html, status, headers) # return request_firewall_issue_creation(path) # else: # detected_protections.add(plugin.__product__) detected_protections.add(plugin.__product__) except Exception: pass if len(detected_protections) > 0: if UNKNOWN_FIREWALL_NAME not in detected_protections: amount_of_products += 1 if len(detected_protections) > 1: for i, _ in enumerate(list(detected_protections)): amount_of_products += 1 if amount_of_products == 1: # 获取检测到的产品的 __product__ 一般只有一个 detected_protections = list(detected_protections)[0] success( "detected website protection identified as '{}', searching for bypasses" .format(detected_protections)) # 如果没有指定 --skip if not skip_bypass_check: # get_working_tampers() 返回一个 working_tampers 集合 # working_tampers = set() # working_tampers.add((tamper.__type__, tamper.tamper(tamper.__example_payload__), tamper)), 是个元组 found_working_tampers = get_working_tampers( url, normal_response, payloads, proxy=proxy, agent=user_agent, verbose=verbose, tamper_int=tamper_int, provided_headers=provided_headers, throttle=throttle, timeout=request_timeout) # 没加 --format 就只是美化输出 if not formatted: # display_found_tampers 是美化输出的, 输出 found_working_tampers display_found_tampers(found_working_tampers) else: # dictify_output return json_retval 是由一个字典组成的 # 这个字典包含 { # "url": url, # "identified firewall": detect_firewalls, # "is protected": True, # "apparent working tampers": "自己输入的 payload" # } dict_data_output = dictify_output(url, detected_protections, found_working_tampers) # 写入文件 # 注意, 这个 filename 可能是 None, 不一定会指定 CSV、JSON 或者 YAML if file_path: written_file_path = write_to_file( filename, file_path, dict_data_output, write_csv=use_csv, write_yaml=use_yaml, write_json=use_json, save_copy_to=save_file_copy_path) if written_file_path is not None: info("data has been written to file: '{}'".format( written_file_path)) """ cached_urls table field -> id uri working_tampers DEFAULT 'N/A', " identified_protections DEFAULT 'N/A'," identified_webserver DEFAULT 'N/A'" """ inserted_into_database_results = insert_url( # found_webserver 是检查是否 response 的 header 中有 server 字段 # found_working_tampers 和 detected_protections 如果不止一个, 就用 , 拼接 cursor, current_url_netloc, found_working_tampers, detected_protections, webserver=found_webserver) else: # 指定了 --skip, 就会跳过 tamper 这个字段的写入 warning("skipping bypass checks") # --format if formatted: # 格式化输出的 dict_data_output = dictify_output(url, detected_protections, []) # 写入文件 # 注意, 这个 filename 可能是 None, 不一定会指定 CSV、JSON 或者 YAML written_file_path = write_to_file( filename, file_path, dict_data_output, write_csv=use_csv, write_yaml=use_yaml, write_json=use_json, save_copy_to=save_file_copy_path) # 也就是 如果指定了 json csv yaml 中的任何一个 if written_file_path is not None: info("data has been written to file: '{}'".format( written_file_path)) if isinstance(detected_protections, str): # 在 list 的基础上再加个 [] detected_protections = [detected_protections] # 因为选择了 --skip 所以跳过 tamper 阶段 inserted_into_database_results = insert_url( cursor, current_url_netloc, [], detected_protections, webserver=found_webserver) elif amount_of_products == 0: # 没找到 warning("no protection identified on target, verifying") if verification_number is None: verification_number = 5 verification_normal_response = get_page( url, proxy=proxy, user_agent=user_agent, provided_headers=provided_headers, throttle=throttle, timeout=request_timeout, request_method=request_type, post_data=post_data) # 随便从默认的 payload 文件中拿第四个 payloaded_url = "{}{}".format(url, WAF_REQUEST_DETECTION_PAYLOADS[3]) verification_payloaded_response = get_page( payloaded_url, proxy=proxy, user_agent=user_agent, provided_headers=provided_headers, throttle=throttle, timeout=request_timeout, request_method=request_type, post_data=post_data) # check_if_matched 返回 response 集合 或者返回 None, 当 normal url 和 加了 payload 的 url 的返回头一样时候, 肯定就 # 是返回的 None results = check_if_matched(verification_normal_response, verification_payloaded_response, verified=verification_number) if results is not None: data_sep = colored("-" * 30, 'white') info( "target seems to be behind some kind of protection for the following reasons:" ) print(data_sep) for i, content in enumerate(results, start=1): print("[{}] {}".format(i, content)) print(data_sep) # 暂时屏蔽 # 这一段是说明, 如果 waf 的指纹没检测出来, 但是进行比较之后 又发现了不同, 说明指纹库不够强大, 这时候会发送 issues 到 # 作者的 github 上 # _, status, html, headers = verification_payloaded_response # if status != 0: # path = create_fingerprint(url, html, status, headers) # request_firewall_issue_creation(path) # else: # warning( # "status code returned as `0` meaning that there is no content in the webpage, " # "issue will not be created" # ) inserted_into_database_results = insert_url( current_url_netloc, [], [], cursor, webserver=found_webserver) else: # 说明没有发现不同 success("no protection identified on target") if formatted: if not force_file_creation: warning( "no data will be written to files since no protection could be identified, " "to force file creation pass the `--force-file` argument" ) else: # if the argument `--force-file` is passed we will create the file # anyways, this should give users who are relying on the JSON files # for thirdparty information a chance to get the data out of the directory # then they can easily parse it without problems. warning( "forcing file creation without successful identification" ) dict_data_output = dictify_output(url, None, []) written_file_path = write_to_file( filename, file_path, dict_data_output, write_csv=use_csv, write_yaml=use_yaml, write_json=use_json, save_copy_to=save_file_copy_path) if written_file_path is not None: info("data has been written to file: '{}'".format( written_file_path)) inserted_into_database_results = insert_url( current_url_netloc, [], [], cursor, webserver=found_webserver) else: # 不止一个 waf protections success("multiple protections identified on target{}:".format( " (unknown firewall will not be displayed)" if UNKNOWN_FIREWALL_NAME in detected_protections else "")) detected_protections = [item for item in list(detected_protections)] for i, protection in enumerate(detected_protections, start=1): if not protection == UNKNOWN_FIREWALL_NAME: success("#{} '{}'".format(i, protection)) if not skip_bypass_check: info("searching for bypasses") found_working_tampers = get_working_tampers( url, normal_response, payloads, proxy=proxy, agent=user_agent, verbose=verbose, tamper_int=tamper_int, throttle=throttle, timeout=request_timeout, provided_headers=provided_headers) if not formatted: # 将 tampers 输出的更加好看 produce_results(found_working_tampers) else: # dictify_ouput 的返回 -> json_retval 是由一个字典组成的 dict_data_output = dictify_output(url, detected_protections, found_working_tampers) written_file_path = write_to_file( filename, file_path, dict_data_output, write_csv=use_csv, write_yaml=use_yaml, write_json=use_json, save_copy_to=save_file_copy_path) if written_file_path is not None: info("data has been written to file: '{}'".format( written_file_path)) # 写入数据库 inserted_into_database_results = insert_url( current_url_netloc, found_working_tampers, detected_protections, cursor, webserver=found_webserver) else: # 跳过 tampers 的检查 warning("skipping bypass tests") if formatted: dict_data_output = dictify_output(url, detected_protections, []) written_file_path = write_to_file( filename, file_path, dict_data_output, write_csv=use_csv, write_yaml=use_yaml, write_json=use_json, save_copy_to=save_file_copy_path) if written_file_path is not None: info("data has been written to file: '{}'".format( written_file_path)) inserted_into_database_results = insert_url( current_url_netloc, [], detected_protections, cursor, webserver=found_webserver) if inserted_into_database_results: info("URL has been cached for future use") # 返回请求的总数量 return request_count
def parse_search_results(query, url_to_search, verbose=False, **kwargs): """ Parse a webpage from Google for URL's with a GET(query) parameter """ exclude = ("www.google.com", "map.google.com", "mail.google.com", "drive.google.com", "news.google.com", "accounts.google.com") splitter = "&" retval = set() query_url = None def __get_headers(): proxy_string, user_agent = None, None try: proxy_string = kwargs.get("proxy") except: pass try: user_agent = kwargs.get("agent") except: pass return proxy_string, user_agent if verbose: logger.debug( set_color("checking for user-agent and proxy configuration...", level=10)) proxy_string, user_agent = __get_headers() if proxy_string is None: proxy_string = None else: proxy_string = proxy_string_to_dict(proxy_string) if user_agent is None: user_agent = DEFAULT_USER_AGENT else: user_agent = user_agent user_agent_info = "adjusting user-agent header to {}..." if user_agent is not DEFAULT_USER_AGENT: user_agent_info = user_agent_info.format(user_agent.strip()) else: user_agent_info = user_agent_info.format( "default user agent '{}'".format(DEFAULT_USER_AGENT)) proxy_string_info = "setting proxy to {}..." if proxy_string is not None: proxy_string_info = proxy_string_info.format( ''.join(proxy_string.keys()) + "://" + ''.join(proxy_string.values())) else: proxy_string_info = "no proxy configuration detected..." headers = {"Connection": "close", "user-agent": user_agent} logger.info(set_color("attempting to gather query URL...")) try: query_url = get_urls(query, url_to_search, verbose=verbose, user_agent=user_agent, proxy=proxy_string) except Exception as e: if "WebDriverException" in str(e): logger.exception( set_color( "it seems that you exited the browser, please allow the browser " "to complete it's run so that Zeus can bypass captchas and API " "calls", level=50)) elif "'/usr/lib/firefoxdriver/webdriver.xpi'" in str(e): logger.fatal( set_color( "firefox was not found in the default location on your system, " "check your installation and make sure it is in /usr/lib, if you " "find it there, restart your system and try again...", level=50)) else: logger.exception( set_color( "{} failed to gather the URL from search engine, caught exception '{}' " "exception has been logged to current log file...".format( os.path.basename(__file__), str(e).strip()), level=50)) request_issue_creation() shutdown() logger.info( set_color( "URL successfully gathered, searching for GET parameters...")) logger.info(set_color(proxy_string_info)) req = requests.get(query_url, proxies=proxy_string) logger.info(set_color(user_agent_info)) req.headers.update(headers) found_urls = URL_REGEX.findall(req.text) for urls in list(found_urls): for url in list(urls): url = unquote(url) if URL_QUERY_REGEX.match(url) and not any(l in url for l in exclude): if isinstance(url, unicode): url = str(url).encode("utf-8") if "webcache" in url: logger.info( set_color( "received webcache URL, extracting URL from webcache..." )) url = extract_webcache_url(url) if verbose: try: logger.debug( set_color("found '{}'...".format( url.split(splitter)[0]), level=10)) except TypeError: logger.debug( set_color("found '{}'...".format( str(url).split(splitter)[0]), level=10)) except AttributeError: logger.debug( set_color("found '{}...".format(str(url)), level=10)) retval.add(url.split("&")[0]) logger.info( set_color("found a total of {} URL's with a GET parameter...".format( len(retval)))) if len(retval) != 0: write_to_log_file(retval, URL_LOG_PATH, "url-log-{}.log") else: logger.critical( set_color( "did not find any usable URL's with the given query '{}' " "using search engine '{}'...".format(query, url_to_search), level=50)) shutdown() return list(retval) if len(retval) != 0 else None