url.strip(), sqlmap=opt.runSqliScan, nmap=opt.runPortScan, intel=opt.intelCheck, xss=opt.runXssScan, admin=opt.adminPanelFinder, given_path=opt.givenSearchPath, auto=opt.autoStartSqlmap, verbose=opt.runInVerbose, batch=opt.runInBatch ) except Exception as e: logger.exception(set_color( "ran into exception '{}' and cannot continue, saved to current log file...".format(e), level=50 )) fix_log_file() request_issue_creation() elif opt.spiderWebSite: if not URL_REGEX.match(opt.spiderWebSite): err_msg = "URL did not match a true URL{}..." if "www" in opt.spiderWebSite: err_msg = err_msg.format(" issue seems to be that 'www' is in the URL, " "replace with http(s)://") else: err_msg = err_msg.format("") raise InvalidInputProvided( err_msg ) else: if URL_QUERY_REGEX.match(opt.spiderWebSite): is_sure = prompt( "it is recomened to not use a URL that has a GET(query) parameter in it, " "would you like to continue", "yN" )
def search_multiple_pages(query, link_amount, proxy=None, agent=None, verbose=False): def __config_proxy(proxy_string): proxy_type_schema = { "http": httplib2.socks.PROXY_TYPE_HTTP, "socks4": httplib2.socks.PROXY_TYPE_SOCKS4, "socks5": httplib2.socks.PROXY_TYPE_SOCKS5 } proxy_type = get_proxy_type(proxy_string)[0] proxy_dict = proxy_string_to_dict(proxy_string) proxy_config = httplib2.ProxyInfo( proxy_type=proxy_type_schema[proxy_type], proxy_host="".join(proxy_dict.keys()), proxy_port="".join(proxy_dict.values())) return proxy_config if proxy is not None: if verbose: logger.debug( set_color("configuring to use proxy '{}'...".format(proxy), level=10)) __config_proxy(proxy) if agent is not None: if verbose: logger.debug( set_color("settings user-agent to '{}'...".format(agent), level=10)) logger.warning( set_color( "multiple pages will be searched using Google's API client, searches may be blocked after a certain " "amount of time...", level=30)) results, limit, found, index = set(), link_amount, 0, google_api.search( query, user_agent=agent, safe="on") try: while limit > 0: results.add(next(index)) limit -= 1 found += 1 except Exception as e: if "Error 503" in str(e): logger.fatal( set_color( "Google is blocking the current IP address, dumping already found URL's...", level=50)) results = results pass retval = set() for url in results: if URL_REGEX.match(url) and URL_QUERY_REGEX.match(url): if verbose: logger.debug(set_color("found '{}'...".format(url), level=10)) retval.add(url) if len(retval) != 0: logger.info( set_color( "a total of {} links found out of requested {}...".format( len(retval), link_amount))) write_to_log_file(list(retval), URL_LOG_PATH, "url-log-{}.log") else: logger.error( set_color("unable to extract URL's from results...", level=40))
def parse_search_results(query, url, verbose=False, dirname="{}/log/url-log", filename="url-log-{}.log", **kwargs): """ Parse a webpage from Google for URL's with a GET(query) parameter """ exclude = "google" or "webcache" or "youtube" create_dir(dirname.format(os.getcwd())) full_file_path = "{}/{}".format( dirname.format(os.getcwd()), filename.format(len(os.listdir(dirname.format(os.getcwd()))) + 1)) def __get_headers(): try: proxy_string = kwargs.get("proxy") except: pass try: user_agent = kwargs.get("agent") except: pass return proxy_string, user_agent if verbose: logger.debug( set_color("checking for user-agent and proxy configuration...", level=10)) proxy_string, user_agent = __get_headers() if proxy_string is None: proxy_string = None else: proxy_string = proxy_string_to_dict(proxy_string) if user_agent is None: user_agent = DEFAULT_USER_AGENT else: user_agent = user_agent user_agent_info = "adjusting user-agent header to {}..." if user_agent is not DEFAULT_USER_AGENT: user_agent_info = user_agent_info.format(user_agent.strip()) else: user_agent_info = user_agent_info.format( "default user agent '{}'".format(DEFAULT_USER_AGENT)) proxy_string_info = "setting proxy to {}..." if proxy_string is not None: proxy_string_info = proxy_string_info.format( ''.join(proxy_string.keys()) + "://" + ''.join(proxy_string.values())) else: proxy_string_info = "no proxy configuration detected..." headers = {"Connection": "close", "user-agent": user_agent} logger.info(set_color("attempting to gather query URL...")) try: query_url = get_urls(query, url, verbose=verbose, user_agent=user_agent, proxy=proxy_string) except Exception as e: if "WebDriverException" in str(e): logger.exception( set_color( "it seems that you exited the browser, please allow the browser " "to complete it's run so that Zeus can bypass captchas and API " "calls", level=50)) else: logger.exception( set_color( "{} failed to gather the URL from search engine, caught exception '{}' " "exception has been logged to current log file...".format( os.path.basename(__file__), str(e).strip()), level=50)) shutdown() logger.info( set_color( "URL successfully gathered, searching for GET parameters...")) logger.info(set_color(proxy_string_info)) req = requests.get(query_url, proxies=proxy_string) logger.info(set_color(user_agent_info)) req.headers.update(headers) found_urls = URL_REGEX.findall(req.text) retval = set() for urls in list(found_urls): for url in list(urls): url = urllib.unquote(url) if URL_QUERY_REGEX.match(url) and exclude not in url: if type(url) is unicode: url = str(url).encode("utf-8") if verbose: logger.debug( set_color("found '{}'...".format(url), level=10)) retval.add(url.split("&")[0]) logger.info( set_color("found a total of {} URL's with a GET parameter...".format( len(retval)))) if len(retval) != 0: logger.info( set_color( "saving found URL's under '{}'...".format(full_file_path))) with open(full_file_path, "a+") as log: for url in list(retval): log.write(url + "\n") else: logger.critical( set_color( "did not find any usable URL's with the given query '{}' " "using search engine '{}'...".format(query, url), level=50)) shutdown() return list(retval) if len(retval) != 0 else None
def parse_search_results(query, url_to_search, verbose=False, **kwargs): """ Parse a webpage from Google for URL's with a GET(query) parameter """ exclude = ("www.google.com", "map.google.com", "mail.google.com", "drive.google.com", "news.google.com", "accounts.google.com") splitter = "&" retval = set() query_url = None def __get_headers(): proxy_string, user_agent = None, None try: proxy_string = kwargs.get("proxy") except: pass try: user_agent = kwargs.get("agent") except: pass return proxy_string, user_agent if verbose: logger.debug( set_color("checking for user-agent and proxy configuration...", level=10)) proxy_string, user_agent = __get_headers() if proxy_string is None: proxy_string = None else: proxy_string = proxy_string_to_dict(proxy_string) if user_agent is None: user_agent = DEFAULT_USER_AGENT else: user_agent = user_agent user_agent_info = "adjusting user-agent header to {}..." if user_agent is not DEFAULT_USER_AGENT: user_agent_info = user_agent_info.format(user_agent.strip()) else: user_agent_info = user_agent_info.format( "default user agent '{}'".format(DEFAULT_USER_AGENT)) proxy_string_info = "setting proxy to {}..." if proxy_string is not None: proxy_string_info = proxy_string_info.format( ''.join(proxy_string.keys()) + "://" + ''.join(proxy_string.values())) else: proxy_string_info = "no proxy configuration detected..." headers = {"Connection": "close", "user-agent": user_agent} logger.info(set_color("attempting to gather query URL...")) try: query_url = get_urls(query, url_to_search, verbose=verbose, user_agent=user_agent, proxy=proxy_string) except Exception as e: if "WebDriverException" in str(e): logger.exception( set_color( "it seems that you exited the browser, please allow the browser " "to complete it's run so that Zeus can bypass captchas and API " "calls", level=50)) elif "'/usr/lib/firefoxdriver/webdriver.xpi'" in str(e): logger.fatal( set_color( "firefox was not found in the default location on your system, " "check your installation and make sure it is in /usr/lib, if you " "find it there, restart your system and try again...", level=50)) else: logger.exception( set_color( "{} failed to gather the URL from search engine, caught exception '{}' " "exception has been logged to current log file...".format( os.path.basename(__file__), str(e).strip()), level=50)) request_issue_creation() shutdown() logger.info( set_color( "URL successfully gathered, searching for GET parameters...")) logger.info(set_color(proxy_string_info)) req = requests.get(query_url, proxies=proxy_string) logger.info(set_color(user_agent_info)) req.headers.update(headers) found_urls = URL_REGEX.findall(req.text) for urls in list(found_urls): for url in list(urls): url = unquote(url) if URL_QUERY_REGEX.match(url) and not any(l in url for l in exclude): if isinstance(url, unicode): url = str(url).encode("utf-8") if "webcache" in url: logger.info( set_color( "received webcache URL, extracting URL from webcache..." )) url = extract_webcache_url(url) if verbose: try: logger.debug( set_color("found '{}'...".format( url.split(splitter)[0]), level=10)) except TypeError: logger.debug( set_color("found '{}'...".format( str(url).split(splitter)[0]), level=10)) except AttributeError: logger.debug( set_color("found '{}...".format(str(url)), level=10)) retval.add(url.split("&")[0]) logger.info( set_color("found a total of {} URL's with a GET parameter...".format( len(retval)))) if len(retval) != 0: write_to_log_file(retval, URL_LOG_PATH, "url-log-{}.log") else: logger.critical( set_color( "did not find any usable URL's with the given query '{}' " "using search engine '{}'...".format(query, url_to_search), level=50)) shutdown() return list(retval) if len(retval) != 0 else None