Python URL_REGEX Examples, lib.settings.URL_REGEX Python Examples

Example #1

0

Show file

                                url.strip(),
                                sqlmap=opt.runSqliScan, nmap=opt.runPortScan, intel=opt.intelCheck, xss=opt.runXssScan,
                                admin=opt.adminPanelFinder, given_path=opt.givenSearchPath,
                                auto=opt.autoStartSqlmap, verbose=opt.runInVerbose, batch=opt.runInBatch
                            )

            except Exception as e:
                logger.exception(set_color(
                    "ran into exception '{}' and cannot continue, saved to current log file...".format(e),
                    level=50
                ))
                fix_log_file()
                request_issue_creation()

        elif opt.spiderWebSite:
            if not URL_REGEX.match(opt.spiderWebSite):
                err_msg = "URL did not match a true URL{}..."
                if "www" in opt.spiderWebSite:
                    err_msg = err_msg.format(" issue seems to be that 'www' is in the URL, "
                                             "replace with http(s)://")
                else:
                    err_msg = err_msg.format("")
                raise InvalidInputProvided(
                    err_msg
                )
            else:
                if URL_QUERY_REGEX.match(opt.spiderWebSite):
                    is_sure = prompt(
                        "it is recomened to not use a URL that has a GET(query) parameter in it, "
                        "would you like to continue", "yN"
                    )

Example #2

0

Show file

File: search.py Project: Expertasif/Zeus-Scanner

def search_multiple_pages(query,
                          link_amount,
                          proxy=None,
                          agent=None,
                          verbose=False):
    def __config_proxy(proxy_string):
        proxy_type_schema = {
            "http": httplib2.socks.PROXY_TYPE_HTTP,
            "socks4": httplib2.socks.PROXY_TYPE_SOCKS4,
            "socks5": httplib2.socks.PROXY_TYPE_SOCKS5
        }
        proxy_type = get_proxy_type(proxy_string)[0]
        proxy_dict = proxy_string_to_dict(proxy_string)
        proxy_config = httplib2.ProxyInfo(
            proxy_type=proxy_type_schema[proxy_type],
            proxy_host="".join(proxy_dict.keys()),
            proxy_port="".join(proxy_dict.values()))
        return proxy_config

    if proxy is not None:
        if verbose:
            logger.debug(
                set_color("configuring to use proxy '{}'...".format(proxy),
                          level=10))
        __config_proxy(proxy)

    if agent is not None:
        if verbose:
            logger.debug(
                set_color("settings user-agent to '{}'...".format(agent),
                          level=10))

    logger.warning(
        set_color(
            "multiple pages will be searched using Google's API client, searches may be blocked after a certain "
            "amount of time...",
            level=30))
    results, limit, found, index = set(), link_amount, 0, google_api.search(
        query, user_agent=agent, safe="on")
    try:
        while limit > 0:
            results.add(next(index))
            limit -= 1
            found += 1
    except Exception as e:
        if "Error 503" in str(e):
            logger.fatal(
                set_color(
                    "Google is blocking the current IP address, dumping already found URL's...",
                    level=50))
            results = results
            pass

    retval = set()
    for url in results:
        if URL_REGEX.match(url) and URL_QUERY_REGEX.match(url):
            if verbose:
                logger.debug(set_color("found '{}'...".format(url), level=10))
            retval.add(url)

    if len(retval) != 0:
        logger.info(
            set_color(
                "a total of {} links found out of requested {}...".format(
                    len(retval), link_amount)))
        write_to_log_file(list(retval), URL_LOG_PATH, "url-log-{}.log")
    else:
        logger.error(
            set_color("unable to extract URL's from results...", level=40))

Example #3

0

Show file

File: search.py Project: olivierh59500/Zeus-Scanner

def parse_search_results(query,
                         url,
                         verbose=False,
                         dirname="{}/log/url-log",
                         filename="url-log-{}.log",
                         **kwargs):
    """
      Parse a webpage from Google for URL's with a GET(query) parameter
    """
    exclude = "google" or "webcache" or "youtube"

    create_dir(dirname.format(os.getcwd()))
    full_file_path = "{}/{}".format(
        dirname.format(os.getcwd()),
        filename.format(len(os.listdir(dirname.format(os.getcwd()))) + 1))

    def __get_headers():
        try:
            proxy_string = kwargs.get("proxy")
        except:
            pass

        try:
            user_agent = kwargs.get("agent")
        except:
            pass

        return proxy_string, user_agent

    if verbose:
        logger.debug(
            set_color("checking for user-agent and proxy configuration...",
                      level=10))
    proxy_string, user_agent = __get_headers()

    if proxy_string is None:
        proxy_string = None
    else:
        proxy_string = proxy_string_to_dict(proxy_string)
    if user_agent is None:
        user_agent = DEFAULT_USER_AGENT
    else:
        user_agent = user_agent

    user_agent_info = "adjusting user-agent header to {}..."
    if user_agent is not DEFAULT_USER_AGENT:
        user_agent_info = user_agent_info.format(user_agent.strip())
    else:
        user_agent_info = user_agent_info.format(
            "default user agent '{}'".format(DEFAULT_USER_AGENT))

    proxy_string_info = "setting proxy to {}..."
    if proxy_string is not None:
        proxy_string_info = proxy_string_info.format(
            ''.join(proxy_string.keys()) + "://" +
            ''.join(proxy_string.values()))
    else:
        proxy_string_info = "no proxy configuration detected..."

    headers = {"Connection": "close", "user-agent": user_agent}
    logger.info(set_color("attempting to gather query URL..."))
    try:
        query_url = get_urls(query,
                             url,
                             verbose=verbose,
                             user_agent=user_agent,
                             proxy=proxy_string)
    except Exception as e:
        if "WebDriverException" in str(e):
            logger.exception(
                set_color(
                    "it seems that you exited the browser, please allow the browser "
                    "to complete it's run so that Zeus can bypass captchas and API "
                    "calls",
                    level=50))
        else:
            logger.exception(
                set_color(
                    "{} failed to gather the URL from search engine, caught exception '{}' "
                    "exception has been logged to current log file...".format(
                        os.path.basename(__file__),
                        str(e).strip()),
                    level=50))
        shutdown()
    logger.info(
        set_color(
            "URL successfully gathered, searching for GET parameters..."))
    logger.info(set_color(proxy_string_info))
    req = requests.get(query_url, proxies=proxy_string)
    logger.info(set_color(user_agent_info))
    req.headers.update(headers)
    found_urls = URL_REGEX.findall(req.text)
    retval = set()
    for urls in list(found_urls):
        for url in list(urls):
            url = urllib.unquote(url)
            if URL_QUERY_REGEX.match(url) and exclude not in url:
                if type(url) is unicode:
                    url = str(url).encode("utf-8")
                if verbose:
                    logger.debug(
                        set_color("found '{}'...".format(url), level=10))
                retval.add(url.split("&amp;")[0])
    logger.info(
        set_color("found a total of {} URL's with a GET parameter...".format(
            len(retval))))
    if len(retval) != 0:
        logger.info(
            set_color(
                "saving found URL's under '{}'...".format(full_file_path)))
        with open(full_file_path, "a+") as log:
            for url in list(retval):
                log.write(url + "\n")
    else:
        logger.critical(
            set_color(
                "did not find any usable URL's with the given query '{}' "
                "using search engine '{}'...".format(query, url),
                level=50))
        shutdown()
    return list(retval) if len(retval) != 0 else None

Example #4

0

Show file

File: search.py Project: Expertasif/Zeus-Scanner

def parse_search_results(query, url_to_search, verbose=False, **kwargs):
    """
      Parse a webpage from Google for URL's with a GET(query) parameter
    """
    exclude = ("www.google.com", "map.google.com", "mail.google.com",
               "drive.google.com", "news.google.com", "accounts.google.com")
    splitter = "&amp;"
    retval = set()
    query_url = None

    def __get_headers():
        proxy_string, user_agent = None, None
        try:
            proxy_string = kwargs.get("proxy")
        except:
            pass

        try:
            user_agent = kwargs.get("agent")
        except:
            pass

        return proxy_string, user_agent

    if verbose:
        logger.debug(
            set_color("checking for user-agent and proxy configuration...",
                      level=10))
    proxy_string, user_agent = __get_headers()

    if proxy_string is None:
        proxy_string = None
    else:
        proxy_string = proxy_string_to_dict(proxy_string)
    if user_agent is None:
        user_agent = DEFAULT_USER_AGENT
    else:
        user_agent = user_agent

    user_agent_info = "adjusting user-agent header to {}..."
    if user_agent is not DEFAULT_USER_AGENT:
        user_agent_info = user_agent_info.format(user_agent.strip())
    else:
        user_agent_info = user_agent_info.format(
            "default user agent '{}'".format(DEFAULT_USER_AGENT))

    proxy_string_info = "setting proxy to {}..."
    if proxy_string is not None:
        proxy_string_info = proxy_string_info.format(
            ''.join(proxy_string.keys()) + "://" +
            ''.join(proxy_string.values()))
    else:
        proxy_string_info = "no proxy configuration detected..."

    headers = {"Connection": "close", "user-agent": user_agent}
    logger.info(set_color("attempting to gather query URL..."))
    try:
        query_url = get_urls(query,
                             url_to_search,
                             verbose=verbose,
                             user_agent=user_agent,
                             proxy=proxy_string)
    except Exception as e:
        if "WebDriverException" in str(e):
            logger.exception(
                set_color(
                    "it seems that you exited the browser, please allow the browser "
                    "to complete it's run so that Zeus can bypass captchas and API "
                    "calls",
                    level=50))
        elif "'/usr/lib/firefoxdriver/webdriver.xpi'" in str(e):
            logger.fatal(
                set_color(
                    "firefox was not found in the default location on your system, "
                    "check your installation and make sure it is in /usr/lib, if you "
                    "find it there, restart your system and try again...",
                    level=50))
        else:
            logger.exception(
                set_color(
                    "{} failed to gather the URL from search engine, caught exception '{}' "
                    "exception has been logged to current log file...".format(
                        os.path.basename(__file__),
                        str(e).strip()),
                    level=50))
            request_issue_creation()
        shutdown()
    logger.info(
        set_color(
            "URL successfully gathered, searching for GET parameters..."))

    logger.info(set_color(proxy_string_info))
    req = requests.get(query_url, proxies=proxy_string)
    logger.info(set_color(user_agent_info))
    req.headers.update(headers)
    found_urls = URL_REGEX.findall(req.text)
    for urls in list(found_urls):
        for url in list(urls):
            url = unquote(url)
            if URL_QUERY_REGEX.match(url) and not any(l in url
                                                      for l in exclude):
                if isinstance(url, unicode):
                    url = str(url).encode("utf-8")
                if "webcache" in url:
                    logger.info(
                        set_color(
                            "received webcache URL, extracting URL from webcache..."
                        ))
                    url = extract_webcache_url(url)
                if verbose:
                    try:
                        logger.debug(
                            set_color("found '{}'...".format(
                                url.split(splitter)[0]),
                                      level=10))
                    except TypeError:
                        logger.debug(
                            set_color("found '{}'...".format(
                                str(url).split(splitter)[0]),
                                      level=10))
                    except AttributeError:
                        logger.debug(
                            set_color("found '{}...".format(str(url)),
                                      level=10))
                retval.add(url.split("&amp;")[0])
    logger.info(
        set_color("found a total of {} URL's with a GET parameter...".format(
            len(retval))))
    if len(retval) != 0:
        write_to_log_file(retval, URL_LOG_PATH, "url-log-{}.log")
    else:
        logger.critical(
            set_color(
                "did not find any usable URL's with the given query '{}' "
                "using search engine '{}'...".format(query, url_to_search),
                level=50))
        shutdown()
    return list(retval) if len(retval) != 0 else None