Ejemplo n.º 1
0
    def recv_info(self, info):

        m_parsed_url = info.parsed_url
        m_results = []

        #------------------------------------------------------------------
        # Find suspicious URLs by matching against known substrings.

        # Load wordlists
        m_wordlist_middle     = WordListLoader.get_wordlist(Config.plugin_config['middle'])
        m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions'])

        # Add matching keywords at any positions of URL.
        m_results.extend([SuspiciousURLPath(info, x)
                          for x in m_wordlist_middle
                          if x in m_parsed_url.directory.split("/") or
                          x == m_parsed_url.filebase or
                          x == m_parsed_url.extension])

        # Add matching keywords at any positions of URL.
        m_results.extend([SuspiciousURLPath(info, x)
                          for x in m_wordlist_extensions
                          if m_parsed_url.extension == x])

        #------------------------------------------------------------------
        # Find suspicious URLs by calculating the Shannon entropy of the hostname.
        # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py
        # TODO: test with unicode enabled hostnames!

        # Check the Shannon entropy for the hostname.
        hostname = info.parsed_url.hostname
        entropy = calculate_shannon_entropy(hostname)
        if entropy > 4.0:
            m_results.append( SuspiciousURLPath(info, hostname) )

        # Check the Shannon entropy for the subdomains.
        for subdomain in info.parsed_url.hostname.split('.'):
            if len(subdomain) > 3:
                entropy = calculate_shannon_entropy(subdomain)
                if entropy > 4.0:
                    m_results.append( SuspiciousURLPath(info, subdomain) )

        #------------------------------------------------------------------
        #
        #
        #
        # Get malware suspicious links
        #
        #
        #
        #------------------------------------------------------------------
        p     = None
        m_url = info.url
        Logger.log_more_verbose("Looking for output links to malware sites")
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                             (info.depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url, self.check_download, allow_redirects=allow_redirects)
        except NetworkException,e:
            Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))
Ejemplo n.º 2
0
    def test_all_wordlist_property(self):
        # Set Config plugin
        LocalFile._LocalFile__plugin_path = os.path.abspath(W_DIR)

        # Create plugin wordlists
        _create_plugin_info()

        # Clean and configure new store
        WordListLoader._WordListLoader__store = {}
        WordListLoader._WordListLoader__load_wordlists(W_DIR)

        try:
            assert WordListLoader.all_wordlists == ["test_wordlist.txt"]
        finally:
            _destroy_plugin_info()
Ejemplo n.º 3
0
    def test_all_wordlist_property(self):
        # Set Config plugin
        LocalFile._LocalFile__plugin_path = os.path.abspath(W_DIR)

        # Create plugin wordlists
        _create_plugin_info()

        # Clean and configure new store
        WordListLoader._WordListLoader__store = {}
        WordListLoader._WordListLoader__load_wordlists(W_DIR)

        try:
            assert WordListLoader.all_wordlists == ["test_wordlist.txt"]
        finally:
            _destroy_plugin_info()
Ejemplo n.º 4
0
    def recv_info(self, info):

        m_parsed_url = info.parsed_url
        m_results = []

        #------------------------------------------------------------------
        # Find suspicious URLs by matching against known substrings.

        # Load wordlists
        m_wordlist_middle = WordListLoader.get_wordlist(
            Config.plugin_config['middle'])
        m_wordlist_extensions = WordListLoader.get_wordlist(
            Config.plugin_config['extensions'])

        # Add matching keywords at any positions of URL.
        m_results.extend([
            SuspiciousURL(info, x) for x in m_wordlist_middle
            if x in m_parsed_url.directory.split("/")
            or x == m_parsed_url.filebase or x == m_parsed_url.extension
        ])

        # Add matching keywords at any positions of URL.
        m_results.extend([
            SuspiciousURL(info, x) for x in m_wordlist_extensions
            if m_parsed_url.extension == x
        ])

        #------------------------------------------------------------------
        # Find suspicious URLs by calculating the Shannon entropy of the hostname.
        # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py
        # TODO: test with unicode enabled hostnames!

        # Check the Shannon entropy for the hostname.
        hostname = info.parsed_url.hostname
        entropy = calculate_shannon_entropy(hostname)
        if entropy > 4.0:
            m_results.append(SuspiciousURL(info, hostname))

        # Check the Shannon entropy for the subdomains.
        for subdomain in info.parsed_url.hostname.split('.'):
            if len(subdomain) > 3:
                entropy = calculate_shannon_entropy(subdomain)
                if entropy > 4.0:
                    m_results.append(SuspiciousURL(info, subdomain))

        #------------------------------------------------------------------

        return m_results
Ejemplo n.º 5
0
def load_wordlists(wordlists):
    """
    Load the with names pased as parameter.

    This function receives a list of names of wordlist, defined in plugin
    configuration file, and return a dict with instances of wordlists.

    :param wordlists: list with wordlists names
    :type wordlists: list

    :returns: A dict with wordlists
    :rtype: dict
    """

    m_tmp_wordlist = {}

    # Get wordlist to load
    for l_w in wordlists:
        for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems(
        ):
            if wordlist_family.lower() in l_w.lower():
                m_tmp_wordlist[l_w] = l_wordlists

    # Load the wordlist
    m_return = {}
    for k, w_paths in m_tmp_wordlist.iteritems():
        m_return[k] = [WordListLoader.get_wordlist_as_list(w) for w in w_paths]

    return m_return
Ejemplo n.º 6
0
def load_wordlists(wordlists):
    """
    Load the with names pased as parameter.

    This function receives a list of names of wordlist, defined in plugin
    configuration file, and return a dict with instances of wordlists.

    :param wordlists: list with wordlists names
    :type wordlists: list

    :returns: A dict with wordlists
    :rtype: dict
    """

    m_tmp_wordlist = {}

    # Get wordlist to load
    for l_w in wordlists:
        for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems():
            if wordlist_family.lower() in l_w.lower():
                m_tmp_wordlist[l_w] = l_wordlists

    # Load the wordlist
    m_return = {}
    for k, w_paths in m_tmp_wordlist.iteritems():
        m_return[k] = [WordListLoader.get_wordlist(w) for w in w_paths]

    return m_return
Ejemplo n.º 7
0
def get_fingerprinting_wordlist(wordlist):
    """
    Load the wordlist of fingerprints and prepare the info in a dict.

    It using as a keys the name of the server family and, as value, an
    iterable with the keywords related with this web server.

    :return: The results of load of webservers keywords info and related webservers.
    :rtype: tuple(WEBSERVER_KEYWORDS, RELATED_SERVES) <=>  (dict(SERVERNAME: set(str(KEYWORDS))), dict(SERVER_NAME, set(str(RELATED_SERVERS)))
    """

    # Load the wordlist
    m_w = WordListLoader.get_advanced_wordlist_as_dict(wordlist,
                                                       separator=";",
                                                       smart_load=True)

    # Load references.
    #
    #   References in the wordlist are specified by # prefix.
    #
    already_parsed = set()
    related = defaultdict(set)
    m_webservers_keys = extend_items(m_w, already_parsed, related)

    return (m_webservers_keys, related)
Ejemplo n.º 8
0
    def analyze_url(self, info):

        m_parsed_url = info.parsed_url
        m_results = []

        Logger.log_more_verbose("Processing URL: %s" % m_parsed_url)


        #----------------------------------------------------------------------
        # Find suspicious URLs by matching against known substrings.

        # Load wordlists
        m_wordlist_middle     = WordListLoader.get_wordlist(Config.plugin_config['middle'])
        m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions'])

        # Add matching keywords at any positions of URL.
        m_results.extend([SuspiciousURLPath(info, x)
                          for x in m_wordlist_middle
                          if x in m_parsed_url.directory.split("/") or
                          x == m_parsed_url.filebase or
                          x == m_parsed_url.extension])

        # Add matching keywords at any positions of URL.
        m_results.extend([SuspiciousURLPath(info, x)
                          for x in m_wordlist_extensions
                          if m_parsed_url.extension == x])


        #----------------------------------------------------------------------
        # Find suspicious URLs by calculating the Shannon entropy of the hostname.
        # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py
        # TODO: test with unicode enabled hostnames!

        # Check the Shannon entropy for the hostname.
        hostname = info.parsed_url.hostname
        entropy = calculate_shannon_entropy(hostname)
        if entropy > 4.0:
            m_results.append( SuspiciousURLPath(info, hostname) )

        # Check the Shannon entropy for the subdomains.
        for subdomain in info.parsed_url.hostname.split('.'):
            if len(subdomain) > 3:
                entropy = calculate_shannon_entropy(subdomain)
                if entropy > 4.0:
                    m_results.append( SuspiciousURLPath(info, subdomain) )

        return m_results
Ejemplo n.º 9
0
    def __detect_wordpress_installation(self, url, wordpress_urls):
        """
        Try to detect a wordpress instalation in the current path.

        :param url: URL where try to find the WordPress installation.
        :type url: str

        :param wordpress_urls: string with wordlist name with WordPress URLs.
        :type wordpress_urls: str

        :return: True if wordpress installation found. False otherwise.
        :rtype: bool
        """
        Logger.log_more_verbose(
            "Detecting Wordpress instalation in URI: '%s'." % url)
        total_urls = 0
        urls_found = 0

        error_page = get_error_page(url).raw_data

        for u in WordListLoader.get_wordlist(wordpress_urls):
            total_urls += 1
            tmp_url = urljoin(url, u)

            r = HTTP.get_url(tmp_url, use_cache=False)
            if r.status == "200":

                # Try to detect non-default error pages
                ratio = get_diff_ratio(r.raw_response, error_page)
                if ratio < 0.35:
                    urls_found += 1

            discard_data(r)

        # If Oks > 85% continue
        if (urls_found / float(total_urls)) < 0.85:

            # If all fails, make another last test
            url_wp_admin = urljoin(url, "wp-admin/")

            try:
                p = HTTP.get_url(url_wp_admin,
                                 use_cache=False,
                                 allow_redirects=False)
                if p:
                    discard_data(p)
            except Exception, e:
                return False

            if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get(
                    "Location", ""):
                return True
            else:
                return False
Ejemplo n.º 10
0
    def __detect_wordpress_installation(self, url, wordpress_urls):
        """
        Try to detect a wordpress instalation in the current path.

        :param url: URL where try to find the WordPress installation.
        :type url: str

        :param wordpress_urls: string with wordlist name with WordPress URLs.
        :type wordpress_urls: str

        :return: True if wordpress installation found. False otherwise.
        :rtype: bool
        """
        Logger.log_more_verbose("Detecting Wordpress instalation in URI: '%s'." % url)
        total_urls = 0
        urls_found = 0

        error_page = get_error_page(url).raw_data

        for u in WordListLoader.get_wordlist(wordpress_urls):
            total_urls += 1
            tmp_url = urljoin(url, u)

            r = HTTP.get_url(tmp_url, use_cache=False)
            if r.status == "200":

                # Try to detect non-default error pages
                ratio = get_diff_ratio(r.raw_response, error_page)
                if ratio < 0.35:
                    urls_found += 1

            discard_data(r)

        # If Oks > 85% continue
        if (urls_found / float(total_urls)) < 0.85:

            # If all fails, make another last test
            url_wp_admin = urljoin(url, "wp-admin/")

            try:
                p = HTTP.get_url(url_wp_admin, use_cache=False, allow_redirects=False)
                if p:
                    discard_data(p)
            except Exception, e:
                return False

            if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get("Location", ""):
                return True
            else:
                return False
Ejemplo n.º 11
0
    def test__get_wordlist_descriptor_exits_in_plugin_path(self):
        # Config plugin
        LocalFile._LocalFile__plugin_path = os.path.abspath(W_DIR)

        _create_plugin_info()

        try:
            wordlist_file = WordListLoader._WordListLoader__get_wordlist_descriptor(W_PATH)

            # Checks if wordlist is file
            wordlist_file == wordlist_file == open(W_PATH, "rU")

            # Checks if wordlist is non file
            pytest.raises(WordlistNotFound, WordListLoader._WordListLoader__get_wordlist_descriptor, "plugin_tmp_dir")
        finally:
            _destroy_plugin_info()
Ejemplo n.º 12
0
    def recv_info(self, info):

        # Make sure it's a CNAME record.
        # This is required because a plugin can't ask for a specific DNS
        # register type - all types are received together.
        if info.type != "CNAME":
            return

        # Get the root domain.
        root = info.target
        Logger.log_verbose(
            "Looking for poisoned domains at: *.%s" % root)

        # Load the malware URLs list.
        wordlist_filename = Config.plugin_args["wordlist"]
        try:
            wordlist = WordListLoader.get_advanced_wordlist_as_list(
                wordlist_filename)
        except WordlistNotFound:
            Logger.log_error_verbose(
                "Wordlist not found: " + wordlist_filename)
            return
        except TypeError:
            Logger.log_error_verbose(
                "Wordlist is not a file: " + wordlist_filename)
            return

        results = []
        root_set = set([root])

        for x in root_set.intersection(set(wordlist)):
            v = DNSPoisoning(x)

            v.add_information(info)
            results.append(v)

        # Log how many results we got.
        if results:
            Logger.log_verbose(
                "Discovered %s poisoned domains." % len(results))
        else:
            Logger.log_verbose("No poisoned domains found.")

        # Return the results.
        return results
Ejemplo n.º 13
0
    def test__get_wordlist_descriptor_exits_abs_path(self):
        # Config plugin
        LocalFile._LocalFile__plugin_path = os.getcwd()

        _create_plugin_info()

        try:
            wordlist_file = WordListLoader._WordListLoader__get_wordlist_descriptor(
                W_PATH)

            # Checks if wordlist is file
            wordlist_file == open(W_PATH, "rU")

            # Checks if wordlist is non file
            pytest.raises(
                WordlistNotFound,
                WordListLoader._WordListLoader__get_wordlist_descriptor, W_DIR)
        finally:
            _destroy_plugin_info()
Ejemplo n.º 14
0
    def run(self, info):

        # Make sure it's a CNAME record.
        # This is required because a plugin can't ask for a specific DNS
        # register type - all types are received together.
        if info.type != "CNAME":
            return

        # Get the root domain.
        root = info.target
        Logger.log_verbose("Looking for poisoned domains at: *.%s" % root)

        # Load the malware URLs list.
        wordlist_filename = Config.plugin_args["wordlist"]
        try:
            wordlist = WordListLoader.get_advanced_wordlist_as_list(
                wordlist_filename)
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist not found: " +
                                     wordlist_filename)
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist is not a file: " +
                                     wordlist_filename)
            return

        results = []
        root_set = set([root])

        for x in root_set.intersection(set(wordlist)):
            results.append(DNSPoisoning(info, x))

        # Log how many results we got.
        if results:
            Logger.log_verbose("Discovered %s poisoned domains." %
                               len(results))
        else:
            Logger.log_verbose("No poisoned domains found.")

        # Return the results.
        return results
Ejemplo n.º 15
0
    def ttl_platform_detection(self, main_url):
        """
        This function tries to recognize the remote platform doing a ping and analyzing the
        TTL of IP header response.

        :param main_url: Base url to test.
        :type main_url: str

        :return: Possible platforms.
        :rtype: list(tuple(OS, version))
        """

        # Do a ping
        try:
            m_ttl = do_ping_and_receive_ttl(ParsedURL(main_url).hostname, 2)

            # Load words for the wordlist
            l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(
                Config.plugin_extra_config["Wordlist_ttl"]["ttl"])
            # Looking for matches
            l_matches = l_wordlist_instance.matches_by_value(m_ttl)

            if l_matches:
                m_ret = {}
                for v in l_matches:
                    sp = v.split("|")
                    k = sp[0].strip()
                    v = sp[1].strip()
                    m_ret[k] = v

                return [(k, v) for k, v in m_ret.iteritems()]
            else:
                return {}
        except EnvironmentError:
            Logger.log_error(
                "[!] You can't run the platform detection plugin if you're not root."
            )
            return {}
        except Exception, e:
            Logger.log_error("[!] Platform detection failed, reason: %s" % e)
            return {}
Ejemplo n.º 16
0
def get_list_from_wordlist(wordlist):
    """
    Load the content of the wordlist and return a set with the content.

    :param wordlist: wordlist name.
    :type wordlist: str

    :return: a set with the results.
    :rtype result_output: set
    """

    try:
        m_commom_wordlists = set()

        for v in Config.plugin_extra_config[wordlist].itervalues():
            m_commom_wordlists.update(WordListLoader.get_wordlist_as_list(v))

        return m_commom_wordlists
    except KeyError, e:
        Logger.log_error_more_verbose(str(e))
        return set()
Ejemplo n.º 17
0
def get_list_from_wordlist(wordlist):
    """
    Load the content of the wordlist and return a set with the content.

    :param wordlist: wordlist name.
    :type wordlist: str

    :return: a set with the results.
    :rtype result_output: set
    """

    try:
        m_commom_wordlists = set()

        for v in Config.plugin_extra_config[wordlist].itervalues():
            m_commom_wordlists.update(WordListLoader.get_advanced_wordlist_as_list(v))

        return m_commom_wordlists
    except KeyError,e:
        Logger.log_error_more_verbose(str(e))
        return set()
Ejemplo n.º 18
0
    def recv_info(self, info):

        # Make sure it's really a CNAME record.
        # This check should never fail anyway!
        if info.type != "CNAME":
            Logger.log_error_verbose("No CNAME found, skipped.")
            return

        # Get the root domain.
        root = info.target
        Logger.log_verbose("Looking for poisoned domains at: *.%s" % root)

        # Load the malware URLs list.
        wordlist_filename = Config.plugin_args["wordlist"]
        try:
            wordlist = WordListLoader.get_advanced_wordlist_as_list(wordlist_filename)
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist not found: " + wordlist_filename)
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist is not a file: " + wordlist_filename)
            return

        results = []
        root_set = set([root])

        for x in root_set.intersection(set(wordlist)):
            v = DNSPoisoning(x)

            v.add_information(info)
            results.append(v)

        # Log how many results we got.
        if results:
            Logger.log_verbose("Discovered %s poisoned domains." % len(results))
        else:
            Logger.log_verbose("No poisoned domains found.")

        # Return the results.
        return results
Ejemplo n.º 19
0
    def ttl_platform_detection(self, main_url):
        """
        This function tries to recognize the remote platform doing a ping and analyzing the
        TTL of IP header response.

        :param main_url: Base url to test.
        :type main_url: str

        :return: Possible platforms.
        :rtype: list(tuple(OS, version))
        """

        # Do a ping
        try:
            m_ttl               = do_ping_and_receive_ttl(ParsedURL(main_url).hostname, 2)

            # Load words for the wordlist
            l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config["Wordlist_ttl"]["ttl"])
            # Looking for matches
            l_matches           = l_wordlist_instance.matches_by_value(m_ttl)

            if l_matches:
                m_ret = {}
                for v in l_matches:
                    sp = v.split("|")
                    k = sp[0].strip()
                    v = sp[1].strip()
                    m_ret[k] = v

                return [(k,v) for k,v in m_ret.iteritems()]
            else:
                return {}
        except EnvironmentError:
            Logger.log_error("[!] You can't run the platform detection plugin if you're not root.")
            return {}
        except Exception, e:
            Logger.log_error("[!] Platform detection failed, reason: %s" % e)
            return {}
Ejemplo n.º 20
0
def get_fingerprinting_wordlist(wordlist):
    """
    Load the wordlist of fingerprints and prepare the info in a dict.

    It using as a keys the name of the server family and, as value, an
    iterable with the keywords related with this web server.

    :return: The results of load of webservers keywords info and related webservers.
    :rtype: tuple(WEBSERVER_KEYWORDS, RELATED_SERVES) <=>  (dict(SERVERNAME: set(str(KEYWORDS))), dict(SERVER_NAME, set(str(RELATED_SERVERS)))
    """

    # Load the wordlist
    m_w = WordListLoader.get_advanced_wordlist_as_dict(wordlist, separator=";", smart_load=True)

    # Load references.
    #
    #   References in the wordlist are specified by # prefix.
    #
    already_parsed    = set()
    related           = defaultdict(set)
    m_webservers_keys = extend_items(m_w, already_parsed, related)

    return (m_webservers_keys, related)
Ejemplo n.º 21
0
    def recv_info(self, info):

        m_domain = info.root

        # Skips localhost
        if m_domain == "localhost":
            return

        m_return = None

        # Checks if the hostname has been already processed
        if not self.state.check(m_domain):

            #
            # Looking for
            #
            m_subdomains = WordListLoader.get_advanced_wordlist_as_list("subs_small.txt")

            # Run in parallel
            self.base_domain = m_domain
            self.completed = Counter(0)
            self.total = len(m_subdomains)
            r = pmap(self.get_subdomains_bruteforcer, m_subdomains, pool_size=10)

            #
            # Remove repeated
            #

            # The results
            m_domains                  = set()
            m_domains_add              = m_domains.add
            m_domains_already          = []
            m_domains_already_append   = m_domains_already.append

            m_ips                      = set()
            m_ips_add                  = m_ips.add
            m_ips_already              = []
            m_ips_already_append       = m_ips_already.append

            if r:
                for doms in r:
                    for dom in doms:
                        # Domains
                        if dom.type == "CNAME":
                            if not dom.target in m_domains_already:
                                m_domains_already_append(dom.target)
                                if dom.target in Config.audit_scope:
                                    m_domains_add(dom)
                                else:
                                    discard_data(dom)

                        # IPs
                        if dom.type == "A":
                            if dom.address not in m_ips_already:
                                m_ips_already_append(dom.address)
                                m_ips_add(dom)

                # Unify
                m_domains.update(m_ips)

                m_return = m_domains


                # Add the information to the host
                map(info.add_information, m_return)

            # Set the domain as processed
            self.state.set(m_domain, True)

            Logger.log_verbose("DNS analyzer plugin found %d subdomains" % len(m_return))


            # Write the info as more user friendly
            if Logger.MORE_VERBOSE:
                m_tmp        = []
                m_tmp_append = m_tmp.append
                for x in m_return:
                    if getattr(x, "address", False):
                        m_tmp_append("%s (%s)" % (getattr(x, "address"), str(x)))
                    elif getattr(x, "target", False):
                        m_tmp_append("%s (%s)" % (getattr(x, "target"), str(x)))
                    else:
                        m_tmp_append(str(x))

                Logger.log_more_verbose("Subdomains found: \n\t+ %s" % "\n\t+ ".join(m_tmp))

        return m_return
Ejemplo n.º 22
0
    def analyze_html(self, info):


        #----------------------------------------------------------------------
        # Get malware suspicious links.

        Logger.log_more_verbose("Processing HTML: %s" % info.identity)

        # Load the malware wordlist.
        wordlist_filename = Config.plugin_config["malware_sites"]
        try:
            wordlist = WordListLoader.get_advanced_wordlist_as_list(
                wordlist_filename)
        except WordlistNotFound:
            Logger.log_error("Wordlist '%s' not found.." % wordlist_filename)
            return
        except TypeError:
            Logger.log_error(
                "Wordlist '%s' is not a file." % wordlist_filename)
            return
        if not wordlist:
            Logger.log_error("Wordlist '%s' is empty." % wordlist_filename)

        Logger.log("1")

        # Get links
        base_urls = set()
        for url in info.find_linked_data(Data.TYPE_RESOURCE,
                                         Resource.RESOURCE_URL):
            m_url = url.url
            base_urls.add(m_url)
            if info.information_type == Information.INFORMATION_HTML:
                m_links = extract_from_html(info.raw_data, m_url)
                m_links.update( extract_from_text(info.raw_data, m_url) )
            elif info.information_type == Information.INFORMATION_PLAIN_TEXT:
                m_links = extract_from_text(info.raw_data, m_url)
            else:
                raise Exception("Internal error!")
        m_links.difference_update(base_urls)

        Logger.log("2")

        # If we have no links, abort now
        if not m_links:
            Logger.log_verbose("No output links found.")
            return

        # Do not follow URLs that contain certain keywords
        m_forbidden = WordListLoader.get_wordlist(
            Config.plugin_config["wordlist_no_spider"])
        m_urls_allowed = {
            url for url in m_links
            if url and not any(x in url for x in m_forbidden)
        }

        Logger.log("3")

        # Get only output links
        m_output_links = []
        for url in m_urls_allowed:
            try:
                if url not in Config.audit_scope:
                    m_output_links.append(url)
            except Exception, e:
                Logger.log_error_more_verbose(format_exc())
Ejemplo n.º 23
0
    def check_download(self, url, name, content_length, content_type):

        # Only accept content when the content type header is present.
        if not content_type:
            Logger.log_more_verbose(
                "Skipping URL, missing content type: %s" % url)
            return False

        # Is the content length present?
        if content_length is not None:

            # Check the file doesn't have 0 bytes.
            if content_length <= 0:
                Logger.log_more_verbose(
                    "Skipping URL, empty content: %s" % url)
                return False

            # Check the file is not too big.
            if content_type.strip().lower().startswith("text/"):
                if content_length > 100000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s"
                        % (content_length, url))
                    return False
            else:
                if content_length > 5000000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s"
                        % (content_length, url))
                    return False

            # Approved!
            return True

        # Content length absent but likely points to a directory index.
        parsed_url = parse_url(url)
        if not parsed_url.filename:

            # Approved!
            return True

        # Extension absent.
        if not parsed_url.extension:

            # Approved!
            return True

        # Match against a known list of valid HTML extensions.
        # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage
        if parsed_url.extension in (
                ".xml", ".html", ".htm", ".xhtml", ".xht",
                ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml",
                ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso",
                ".lassoapp", ".pl", ".php", ".php3", ".phtml",
                ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom",
                ".xml", ".eml", ".jsonld", ".metalink", ".met",
                ".rss", ".xml", ".markdown"):

            # Approved!
            return True

        # If URL path in blacklist?
        m_forbidden = [x for x in WordListLoader.get_wordlist(Config.plugin_config["wordlist_no_spider"])]
        if any(x in url for x in m_forbidden):
            return False

        # Success!
        return True
Ejemplo n.º 24
0
class PredictablesDisclosureBruteforcer(TestingPlugin):

    #--------------------------------------------------------------------------
    def get_accepted_types(self):
        return [FolderURL]

    #--------------------------------------------------------------------------
    def run(self, info):

        m_url = info.url

        Logger.log_more_verbose("Start to process URL: %r" % m_url)

        # Server specified by param?
        webserver_finger = Config.plugin_args.get("server_banner", None)
        if webserver_finger:
            server_canonical_name = webserver_finger
            servers_related = []  # Set with related web servers
        else:
            # User fingerprint info
            webserver_finger = info.get_associated_informations_by_category(
                WebServerFingerprint.information_type)
            if webserver_finger:
                webserver_finger = webserver_finger.pop()

                server_canonical_name = webserver_finger.canonical_name
                servers_related = webserver_finger.related  # Set with related web servers

        wordlist = set()

        # Common wordlists
        try:
            w = Config.plugin_extra_config["common"]
            wordlist.update([l_w for l_w in w.itervalues()])
        except KeyError:
            Logger.log_error("Can't load common wordlists")

        # There is fingerprinting information?
        if webserver_finger:

            #
            # Load wordlists
            #
            wordlist_update = wordlist.update

            # Wordlist of server name
            try:
                w = Config.plugin_extra_config["%s_predictables" %
                                               server_canonical_name]
                wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                Logger.log_error(
                    "Can't load predictables wordlists for server: '%s'." %
                    server_canonical_name)

            # Wordlist of related with the server found
            try:
                for l_servers_related in servers_related:
                    w = Config.plugin_extra_config["%s_predictables" %
                                                   l_servers_related]
                    wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError, e:
                Logger.log_error(
                    "Can't load wordlists predictables wordlists for related webserver: '%s'"
                    % e)

        # Load content of wordlists
        urls = set()
        m_urls_update = urls.add

        for l_w in wordlist:
            # Use a copy of wordlist to avoid modify the original source
            l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w)

            for l_wo in l_loaded_wordlist:
                try:
                    l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo
                    tmp_u = urljoin(m_url, l_wo)
                except ValueError, e:
                    Logger.log_error(
                        "Failed to parse key, from wordlist, '%s'" % tmp_u)
                    continue

                m_urls_update(tmp_u)
Ejemplo n.º 25
0
def http_analyzers(main_url, update_status_func, number_of_entries=4):
    """
    Analyze HTTP headers for detect the web server. Return a list with most possible web servers.

    :param main_url: Base url to test.
    :type main_url: str

    :param update_status_func: function used to update the status of the process
    :type update_status_func: function

    :param number_of_entries: number of resutls tu return for most probable web servers detected.
    :type number_of_entries: int

    :return: Web server family, Web server version, Web server complete description, related web servers (as a dict('SERVER_RELATED' : set(RELATED_NAMES))), others web server with their probabilities as a dict(CONCRETE_WEB_SERVER, PROBABILITY)
    """

    # Load wordlist directly related with a HTTP fields.
    # { HTTP_HEADER_FIELD : [wordlists] }
    m_wordlists_HTTP_fields = {
        "Accept-Ranges"              : "accept-range",
        "Server"                     : "banner",
        "Cache-Control"              : "cache-control",
        "Connection"                 : "connection",
        "Content-Type"               : "content-type",
        "WWW-Authenticate"           : "htaccess-realm",
        "Pragma"                     : "pragma",
        "X-Powered-By"               : "x-powered-by"
    }

    m_actions = {
        'GET'        : { 'wordlist' : 'Wordlist_get'            , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': '/' },
        'LONG_GET'   : { 'wordlist' : 'Wordlist_get_long'       , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': '/%s' % ('a' * 200) },
        'NOT_FOUND'  : { 'wordlist' : 'Wordlist_get_notfound'   , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': '/404_NOFOUND__X02KAS' },
        'HEAD'       : { 'wordlist' : 'Wordlist_head'           , 'weight' : 3 , 'protocol' : 'HTTP/1.1', 'method' : 'HEAD'     , 'payload': '/' },
        'OPTIONS'    : { 'wordlist' : 'Wordlist_options'        , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'OPTIONS'  , 'payload': '/' },
        'DELETE'     : { 'wordlist' : 'Wordlist_delete'         , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'DELETE'   , 'payload': '/' },
        'TEST'       : { 'wordlist' : 'Wordlist_attack'         , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'TEST'     , 'payload': '/' },
        'INVALID'    : { 'wordlist' : 'Wordlist_wrong_method'   , 'weight' : 5 , 'protocol' : 'HTTP/9.8', 'method' : 'GET'      , 'payload': '/' },
        'ATTACK'     : { 'wordlist' : 'Wordlist_wrong_version'  , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': "/etc/passwd?format=%%%%&xss=\x22><script>alert('xss');</script>&traversal=../../&sql='%20OR%201;"}
    }


    # Store results for others HTTP params
    m_d                   = ParsedURL(main_url)
    m_hostname            = m_d.hostname
    m_port                = m_d.port
    m_debug               = False # Only for develop

    # Counter of banners. Used when others methods fails.
    m_banners_counter     = Counter()

    # Score counter
    m_counters = HTTPAnalyzer(debug=m_debug)

    # Var used to update the status
    m_data_len = len(m_actions)
    i          = 1 # element in process


    for l_action, v in m_actions.iteritems():
        if m_debug:
            print "###########"
        l_method      = v["method"]
        l_payload     = v["payload"]
        l_proto       = v["protocol"]
        l_wordlist    = v["wordlist"]

        # Each type of probe hast different weight.
        #
        # Weights go from 0 - 5
        #
        l_weight      = v["weight"]

        # Make the URL
        l_url         = urljoin(main_url, l_payload)

        # Make the raw request
        #l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s:%(port)s\r\nConnection: Close\r\n\r\n" % (
        l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s\r\n\r\n" % (
            {
                "method"     : l_method,
                "payload"    : l_payload,
                "protocol"   : l_proto,
                "host"       : m_hostname,
                "port"       : m_port
            }
        )
        if m_debug:
            print "REQUEST"
            print l_raw_request

        # Do the connection
        l_response = None
        try:
            m_raw_request = HTTP_Raw_Request(l_raw_request)
            discard_data(m_raw_request)
            l_response = HTTP.make_raw_request(
                host        = m_hostname,
                port        = m_port,
                raw_request = m_raw_request,
                callback    = check_raw_response)
            if l_response:
                discard_data(l_response)
        except NetworkException,e:
            Logger.log_error_more_verbose("Server-Fingerprint plugin: No response for URL (%s) '%s'. Message: %s" % (l_method, l_url, str(e)))
            continue

        if not l_response:
            Logger.log_error_more_verbose("No response for URL '%s'." % l_url)
            continue

        if m_debug:
            print "RESPONSE"
            print l_response.raw_headers


        # Update the status
        update_status_func((float(i) * 100.0) / float(m_data_len))
        Logger.log_more_verbose("Making '%s' test." % (l_wordlist))
        i += 1

        # Analyze for each wordlist
        #
        # Store the server banner
        try:
            m_banners_counter[l_response.headers["Server"]] += l_weight
        except KeyError:
            pass

        #
        # =====================
        # HTTP directly related
        # =====================
        #
        #
        for l_http_header_name, l_header_wordlist in m_wordlists_HTTP_fields.iteritems():

            # Check if HTTP header field is in response
            if l_http_header_name not in l_response.headers:
                continue

            l_curr_header_value = l_response.headers[l_http_header_name]

            # Generate concrete wordlist name
            l_wordlist_path     = Config.plugin_extra_config[l_wordlist][l_header_wordlist]

            # Load words for the wordlist
            l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(l_wordlist_path)
            # Looking for matches
            l_matches           = l_wordlist_instance.matches_by_value(l_curr_header_value)

            m_counters.inc(l_matches, l_action, l_weight, l_http_header_name, message="HTTP field: " + l_curr_header_value)

        #
        # =======================
        # HTTP INdirectly related
        # =======================
        #
        #

        #
        # Status code
        # ===========
        #
        l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statuscode"])
        # Looking for matches
        l_matches           = l_wordlist_instance.matches_by_value(l_response.status)

        m_counters.inc(l_matches, l_action, l_weight, "statuscode", message="Status code: " + l_response.status)


        #
        # Status text
        # ===========
        #
        l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statustext"])
        # Looking for matches
        l_matches           = l_wordlist_instance.matches_by_value(l_response.reason)

        m_counters.inc(l_matches, l_action, l_weight, "statustext", message="Status text: " + l_response.reason)


        #
        # Header space
        # ============
        #
        # Count the number of spaces between HTTP field name and their value, for example:
        # -> Server: Apache 1
        # The number of spaces are: 1
        #
        # -> Server:Apache 1
        # The number of spaces are: 0
        #
        l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-space"])
        # Looking for matches
        try:
            l_http_value        = l_response.headers[0] # get the value of first HTTP field
            l_spaces_num        = str(abs(len(l_http_value) - len(l_http_value.lstrip())))
            l_matches           = l_wordlist_instance.matches_by_value(l_spaces_num)

            m_counters.inc(l_matches, l_action, l_weight, "header-space", message="Header space: " + l_spaces_num)

        except IndexError:
            print "index error header space"
            pass


        #
        # Header capitalafterdash
        # =======================
        #
        # Look for non capitalized first letter of field name, for example:
        # -> Content-type: ....
        # Instead of:
        # -> Content-Type: ....
        #
        l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-capitalafterdash"])
        # Looking for matches
        l_valid_fields     = [x for x in l_response.headers.iterkeys() if "-" in x]

        if l_valid_fields:

            l_h = l_valid_fields[0]

            l_value = l_h.split("-")[1] # Get the second value: Content-type => type
            l_dush  = None

            if l_value[0].isupper(): # Check first letter is lower
                l_dush = 1
            else:
                l_dush = 0

            l_matches           = l_wordlist_instance.matches_by_value(l_dush)
            m_counters.inc(l_matches, l_action, l_weight, "header-capitalizedafterdush", message="Capital after dash: %s" % str(l_dush))

        #
        # Header order
        # ============
        #
        l_header_order  = ','.join(l_response.headers.iterkeys())

        l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-order"])
        l_matches           = l_wordlist_instance.matches_by_value(l_header_order)

        m_counters.inc(l_matches, l_action, l_weight, "header-order", message="Header order: " + l_header_order)


        #
        # Protocol name
        # ============
        #
        # For a response like:
        # -> HTTP/1.0 200 OK
        #    ....
        #
        # Get the 'HTTP' value.
        #
        try:
            l_proto             = l_response.protocol # Get the 'HTTP' text from response, if available
            if l_proto:
                l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-name"])
                l_matches           = l_wordlist_instance.matches_by_value(l_proto)

                m_counters.inc(l_matches, l_action, l_weight, "proto-name", message="Proto name: " + l_proto)

        except IndexError:
            print "index error protocol name"
            pass


        #
        # Protocol version
        # ================
        #
        # For a response like:
        # -> HTTP/1.0 200 OK
        #    ....
        #
        # Get the '1.0' value.
        #
        try:
            l_version           = l_response.version # Get the '1.0' text from response, if available
            if l_version:
                l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-version"])
                l_matches           = l_wordlist_instance.matches_by_value(l_version)

                m_counters.inc(l_matches, l_action, l_weight, "proto-version", message="Proto version: " + l_version)

        except IndexError:
            print "index error protocol version"
            pass



        if "ETag" in l_response.headers:
            l_etag_header       = l_response.headers["ETag"]
            #
            # ETag length
            # ================
            #
            l_etag_len          = len(l_etag_header)
            l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-legth"])
            l_matches           = l_wordlist_instance.matches_by_value(l_etag_len)

            m_counters.inc(l_matches, l_action, l_weight, "etag-length", message="ETag length: " + str(l_etag_len))


            #
            # ETag Quotes
            # ================
            #
            l_etag_striped          = l_etag_header.strip()
            if l_etag_striped.startswith("\"") or l_etag_striped.startswith("'"):
                l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-quotes"])
                l_matches           = l_wordlist_instance.matches_by_value(l_etag_striped[0])

                m_counters.inc(l_matches, l_action, l_weight, "etag-quotes", message="Etag quotes: " + l_etag_striped[0])

        if "Vary" in l_response.headers:
            l_vary_header       = l_response.headers["Vary"]
            #
            # Vary delimiter
            # ================
            #
            # Checks if Vary header delimiter is something like this:
            # -> Vary: Accept-Encoding,User-Agent
            # Or this:
            # -> Vary: Accept-Encoding, User-Agent
            #
            l_var_delimiter     = ", " if l_vary_header.find(", ") else ","
            l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-delimiter"])
            l_matches           = l_wordlist_instance.matches_by_value(l_var_delimiter)

            m_counters.inc(l_matches, l_action, l_weight, "vary-delimiter", message="Vary delimiter: " + l_var_delimiter)

            #
            # Vary capitalizer
            # ================
            #
            # Checks if Vary header delimiter is something like this:
            # -> Vary: Accept-Encoding,user-Agent
            # Or this:
            # -> Vary: accept-encoding,user-agent
            #
            l_vary_capitalizer  = str(0 if l_vary_header == l_vary_header.lower() else 1)
            l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-capitalize"])
            l_matches           = l_wordlist_instance.matches_by_value(l_vary_capitalizer)

            m_counters.inc(l_matches, l_action, l_weight, "vary-capitalize", message="Vary capitalizer: " + l_vary_capitalizer)


            #
            # Vary order
            # ================
            #
            # Checks order between vary values:
            # -> Vary: Accept-Encoding,user-Agent
            # Or this:
            # -> Vary: User-Agent,Accept-Encoding
            #
            l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-order"])
            l_matches           = l_wordlist_instance.matches_by_value(l_vary_header)

            m_counters.inc(l_matches, l_action, l_weight, "vary-order", message="Vary order: " + l_vary_header)


        #
        # =====================
        # HTTP specific options
        # =====================
        #
        #
        if l_action == "HEAD":
            #
            # HEAD Options
            # ============
            #
            l_option            = l_response.headers.get("Allow")
            if l_option:
                l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"])
                # Looking for matches
                l_matches           = l_wordlist_instance.matches_by_value(l_option)

                m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="HEAD option: " + l_option)


        if l_action == "OPTIONS" or l_action == "INVALID" or l_action == "DELETE":
            if "Allow" in l_response.headers:
                #
                # Options allow
                # =============
                #
                l_option            = l_response.headers.get("Allow")
                if l_option:
                    l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"])
                    # Looking for matches
                    l_matches           = l_wordlist_instance.matches_by_value(l_option)

                    m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="OPTIONS allow: "  + l_action + " # " + l_option)


                #
                # Allow delimiter
                # ===============
                #
                l_option            = l_response.headers.get("Allow")
                if l_option:
                    l_var_delimiter     = ", " if l_option.find(", ") else ","
                    l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-delimited"])
                    # Looking for matches
                    l_matches           = l_wordlist_instance.matches_by_value(l_var_delimiter)

                    m_counters.inc(l_matches, l_action, l_weight, "options-delimiter", message="OPTION allow delimiter " + l_action + " # " + l_option)


            if "Public" in l_response.headers:
                #
                # Public response
                # ===============
                #
                l_option            = l_response.headers.get("Public")
                if l_option:
                    l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"])
                    # Looking for matches
                    l_matches           = l_wordlist_instance.matches_by_value(l_option)

                    m_counters.inc(l_matches, l_action, l_weight, "options-public", message="Public response: " + l_action + " # " + l_option)
Ejemplo n.º 26
0
class Spider(TestingPlugin):
    """
    This plugin is a web spider.
    """

    #----------------------------------------------------------------------
    def get_accepted_info(self):
        return [Url]

    #----------------------------------------------------------------------
    def recv_info(self, info):

        m_return = []

        m_url = info.url

        Logger.log_verbose("Spidering URL: %r" % m_url)

        # Check if need follow first redirect
        p = None
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                             (info.depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url,
                         self.check_download,
                         allow_redirects=allow_redirects)
        except NetworkException, e:
            Logger.log_more_verbose("Error while processing %r: %s" %
                                    (m_url, str(e)))
        if not p:
            return m_return

        # Send back the data
        m_return.append(p)

        # TODO: If it's a 301 response, get the Location header

        # Get links
        if p.information_type == Information.INFORMATION_HTML:
            m_links = extract_from_html(p.raw_data, m_url)
        else:
            m_links = extract_from_text(p.raw_data, m_url)
        try:
            m_links.remove(m_url)
        except Exception:
            pass

        # Do not follow URLs that contain certain keywords
        m_forbidden = WordListLoader.get_wordlist(
            Config.plugin_config["wordlist_no_spider"])
        m_urls_allowed = [
            url for url in m_links if not any(x in url for x in m_forbidden)
        ]
        m_urls_not_allowed = m_links.difference(m_urls_allowed)
        if m_urls_not_allowed:
            Logger.log_more_verbose("Skipped forbidden URLs:\n    %s" %
                                    "\n    ".join(sorted(m_urls_not_allowed)))

        # Do not follow URLs out of scope
        m_urls_in_scope = []
        m_broken = []
        for url in m_urls_allowed:
            try:
                if url in Config.audit_scope:
                    m_urls_in_scope.append(url)
            except Exception:
                m_broken.append(url)
        if m_broken:
            if len(m_broken) == 1:
                Logger.log_more_verbose("Skipped uncrawlable URL: %s" %
                                        m_broken[0])
            else:
                Logger.log_more_verbose("Skipped uncrawlable URLs:\n    %s" %
                                        "\n    ".join(sorted(m_broken)))
        m_out_of_scope_count = len(m_urls_allowed) - len(
            m_urls_in_scope) - len(m_broken)
        if m_out_of_scope_count:
            Logger.log_more_verbose("Skipped %d links out of scope." %
                                    m_out_of_scope_count)

        if m_urls_in_scope:
            Logger.log_verbose("Found %d links in URL: %s" %
                               (len(m_urls_in_scope), m_url))
        else:
            Logger.log_verbose("No links found in URL: %s" % m_url)

        # Convert to Url data type
        for u in m_urls_in_scope:
            try:
                p = parse_url(u)
                if p.scheme == "mailto":
                    m_resource = Email(p.netloc)
                elif p.scheme in ("http", "https"):
                    m_resource = Url(url=u, referer=m_url)
            except Exception:
                warn(format_exc(), RuntimeWarning)
            m_resource.add_resource(info)
            m_return.append(m_resource)

        # Send the results
        return m_return
Ejemplo n.º 27
0
    def recv_info(self, info):

        # Get the root domain only.
        root = info.root

        # Skip localhost.
        if root == "localhost":
            return

        # Skip root domains we've already processed.
        if self.state.put(root, True):
            return


        # Load the subdomains wordlist.
        try:
            wordlist = WordListLoader.get_advanced_wordlist_as_list(Config.plugin_args["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"])
            return

        # Configure the progress notifier.
        self.progress.set_total(len(wordlist))
        self.progress.min_delta = 1  # notify every 1%

        # For each subdomain in the wordlist...
        found = 0
        results = []
        visited = set()
        for prefix in wordlist:

            # Mark as completed before actually trying.
            # We can't put this at the end of the loop where it belongs,
            # because the "continue" statements would skip over this too.
            self.progress.add_completed()

            # Build the domain name.
            name = ".".join((prefix, root))

            # Skip if out of scope.
            if name not in Config.audit_scope:
                continue

            # Resolve the subdomain.
            records = DNS.get_a(name, also_CNAME=True)
            records.extend( DNS.get_aaaa(name, also_CNAME=True) )

            # If no DNS records were found, skip.
            if not records:
                continue

            # We found a subdomain!
            found += 1
            Logger.log_more_verbose(
                "Subdomain found: %s" % name)

            # Create the Domain object for the subdomain.
            domain = Domain(name)
            results.append(domain)

            # For each DNs record, grab the address or name.
            # Skip duplicated records.
            for rec in records:
                if rec.type == "CNAME":
                    location = rec.target
                elif rec.type in ("A", "AAAA"):
                    location = rec.address
                else: # should not happen...
                    results.append(rec)
                    domain.add_information(rec)
                    continue
                if location not in visited:
                    visited.add(location)
                    results.append(rec)
                    domain.add_information(rec)

        # Log the results.
        if found:
            Logger.log(
                "Found %d subdomains for root domain: %s"
                % (found, root))
        else:
            Logger.log_verbose(
                "No subdomains found for root domain: %s" % root)

        # Return the results.
        return results
Ejemplo n.º 28
0
def http_analyzers(main_url, update_status_func, number_of_entries=4):
    """
    Analyze HTTP headers for detect the web server. Return a list with most possible web servers.

    :param main_url: Base url to test.
    :type main_url: str

    :param update_status_func: function used to update the status of the process
    :type update_status_func: function

    :param number_of_entries: number of resutls tu return for most probable web servers detected.
    :type number_of_entries: int

    :return: Web server family, Web server version, Web server complete description, related web servers (as a dict('SERVER_RELATED' : set(RELATED_NAMES))), others web server with their probabilities as a dict(CONCRETE_WEB_SERVER, PROBABILITY)
    """

    # Load wordlist directly related with a HTTP fields.
    # { HTTP_HEADER_FIELD : [wordlists] }
    m_wordlists_HTTP_fields = {
        "Accept-Ranges"              : "accept-range",
        "Server"                     : "banner",
        "Cache-Control"              : "cache-control",
        "Connection"                 : "connection",
        "Content-Type"               : "content-type",
        "WWW-Authenticate"           : "htaccess-realm",
        "Pragma"                     : "pragma",
        "X-Powered-By"               : "x-powered-by"
    }

    m_actions = {
        'GET'        : { 'wordlist' : 'Wordlist_get'            , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': '/' },
        'LONG_GET'   : { 'wordlist' : 'Wordlist_get_long'       , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': '/%s' % ('a' * 200) },
        'NOT_FOUND'  : { 'wordlist' : 'Wordlist_get_notfound'   , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': '/404_NOFOUND__X02KAS' },
        'HEAD'       : { 'wordlist' : 'Wordlist_head'           , 'weight' : 3 , 'protocol' : 'HTTP/1.1', 'method' : 'HEAD'     , 'payload': '/' },
        'OPTIONS'    : { 'wordlist' : 'Wordlist_options'        , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'OPTIONS'  , 'payload': '/' },
        'DELETE'     : { 'wordlist' : 'Wordlist_delete'         , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'DELETE'   , 'payload': '/' },
        'TEST'       : { 'wordlist' : 'Wordlist_attack'         , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'TEST'     , 'payload': '/' },
        'INVALID'    : { 'wordlist' : 'Wordlist_wrong_method'   , 'weight' : 5 , 'protocol' : 'HTTP/9.8', 'method' : 'GET'      , 'payload': '/' },
        'ATTACK'     : { 'wordlist' : 'Wordlist_wrong_version'  , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET'      , 'payload': "/etc/passwd?format=%%%%&xss=\x22><script>alert('xss');</script>&traversal=../../&sql='%20OR%201;"}
    }


    # Store results for others HTTP params
    m_d                   = ParsedURL(main_url)
    m_hostname            = m_d.hostname
    m_port                = m_d.port
    m_debug               = False # Only for develop

    # Counter of banners. Used when others methods fails.
    m_banners_counter     = Counter()

    # Score counter
    m_counters = HTTPAnalyzer(debug=m_debug)

    # Var used to update the status
    m_data_len = len(m_actions)
    i          = 1 # element in process


    for l_action, v in m_actions.iteritems():
        if m_debug:
            print "###########"
        l_method      = v["method"]
        l_payload     = v["payload"]
        l_proto       = v["protocol"]
        l_wordlist    = v["wordlist"]

        # Each type of probe hast different weight.
        #
        # Weights go from 0 - 5
        #
        l_weight      = v["weight"]

        # Make the URL
        l_url         = urljoin(main_url, l_payload)

        # Make the raw request
        #l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s:%(port)s\r\nConnection: Close\r\n\r\n" % (
        l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s\r\n\r\n" % (
            {
                "method"     : l_method,
                "payload"    : l_payload,
                "protocol"   : l_proto,
                "host"       : m_hostname,
                "port"       : m_port
            }
        )
        if m_debug:
            print "REQUEST"
            print l_raw_request

        # Do the connection
        l_response = None
        try:
            m_raw_request = HTTP_Raw_Request(l_raw_request)
            discard_data(m_raw_request)
            l_response = HTTP.make_raw_request(
                host        = m_hostname,
                port        = m_port,
                raw_request = m_raw_request,
                callback    = check_raw_response)
            if l_response:
                discard_data(l_response)
        except NetworkException,e:
            Logger.log_error_more_verbose("Server-Fingerprint plugin: No response for URL (%s) '%s'. Message: %s" % (l_method, l_url, str(e)))
            continue

        if not l_response:
            Logger.log_error_more_verbose("No response for URL '%s'." % l_url)
            continue

        if m_debug:
            print "RESPONSE"
            print l_response.raw_headers


        # Update the status
        update_status_func((float(i) * 100.0) / float(m_data_len))
        Logger.log_more_verbose("Making '%s' test." % (l_wordlist))
        i += 1

        # Analyze for each wordlist
        #
        # Store the server banner
        try:
            m_banners_counter[l_response.headers["Server"]] += l_weight
        except KeyError:
            pass

        #
        # =====================
        # HTTP directly related
        # =====================
        #
        #
        for l_http_header_name, l_header_wordlist in m_wordlists_HTTP_fields.iteritems():

            # Check if HTTP header field is in response
            if l_http_header_name not in l_response.headers:
                continue

            l_curr_header_value = l_response.headers[l_http_header_name]

            # Generate concrete wordlist name
            l_wordlist_path     = Config.plugin_extra_config[l_wordlist][l_header_wordlist]

            # Load words for the wordlist
            l_wordlist_instance = WordListLoader.get_wordlist_as_dict(l_wordlist_path)
            # Looking for matches
            l_matches           = l_wordlist_instance.matches_by_value(l_curr_header_value)

            m_counters.inc(l_matches, l_action, l_weight, l_http_header_name, message="HTTP field: " + l_curr_header_value)

        #
        # =======================
        # HTTP INdirectly related
        # =======================
        #
        #

        #
        # Status code
        # ===========
        #
        l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statuscode"])
        # Looking for matches
        l_matches           = l_wordlist_instance.matches_by_value(l_response.status)

        m_counters.inc(l_matches, l_action, l_weight, "statuscode", message="Status code: " + l_response.status)


        #
        # Status text
        # ===========
        #
        l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statustext"])
        # Looking for matches
        l_matches           = l_wordlist_instance.matches_by_value(l_response.reason)

        m_counters.inc(l_matches, l_action, l_weight, "statustext", message="Status text: " + l_response.reason)


        #
        # Header space
        # ============
        #
        # Count the number of spaces between HTTP field name and their value, for example:
        # -> Server: Apache 1
        # The number of spaces are: 1
        #
        # -> Server:Apache 1
        # The number of spaces are: 0
        #
        l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-space"])
        # Looking for matches
        try:
            l_http_value        = l_response.headers[0] # get the value of first HTTP field
            l_spaces_num        = str(abs(len(l_http_value) - len(l_http_value.lstrip())))
            l_matches           = l_wordlist_instance.matches_by_value(l_spaces_num)

            m_counters.inc(l_matches, l_action, l_weight, "header-space", message="Header space: " + l_spaces_num)

        except IndexError:
            print "index error header space"
            pass


        #
        # Header capitalafterdash
        # =======================
        #
        # Look for non capitalized first letter of field name, for example:
        # -> Content-type: ....
        # Instead of:
        # -> Content-Type: ....
        #
        l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-capitalafterdash"])
        # Looking for matches
        l_valid_fields     = [x for x in l_response.headers.iterkeys() if "-" in x]

        if l_valid_fields:

            l_h = l_valid_fields[0]

            l_value = l_h.split("-")[1] # Get the second value: Content-type => type
            l_dush  = None

            if l_value[0].isupper(): # Check first letter is lower
                l_dush = 1
            else:
                l_dush = 0

            l_matches           = l_wordlist_instance.matches_by_value(l_dush)
            m_counters.inc(l_matches, l_action, l_weight, "header-capitalizedafterdush", message="Capital after dash: %s" % str(l_dush))

        #
        # Header order
        # ============
        #
        l_header_order  = ','.join(l_response.headers.iterkeys())

        l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-order"])
        l_matches           = l_wordlist_instance.matches_by_value(l_header_order)

        m_counters.inc(l_matches, l_action, l_weight, "header-order", message="Header order: " + l_header_order)


        #
        # Protocol name
        # ============
        #
        # For a response like:
        # -> HTTP/1.0 200 OK
        #    ....
        #
        # Get the 'HTTP' value.
        #
        try:
            l_proto             = l_response.protocol # Get the 'HTTP' text from response, if available
            if l_proto:
                l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-name"])
                l_matches           = l_wordlist_instance.matches_by_value(l_proto)

                m_counters.inc(l_matches, l_action, l_weight, "proto-name", message="Proto name: " + l_proto)

        except IndexError:
            print "index error protocol name"
            pass


        #
        # Protocol version
        # ================
        #
        # For a response like:
        # -> HTTP/1.0 200 OK
        #    ....
        #
        # Get the '1.0' value.
        #
        try:
            l_version           = l_response.version # Get the '1.0' text from response, if available
            if l_version:
                l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-version"])
                l_matches           = l_wordlist_instance.matches_by_value(l_version)

                m_counters.inc(l_matches, l_action, l_weight, "proto-version", message="Proto version: " + l_version)

        except IndexError:
            print "index error protocol version"
            pass



        if "ETag" in l_response.headers:
            l_etag_header       = l_response.headers["ETag"]
            #
            # ETag length
            # ================
            #
            l_etag_len          = len(l_etag_header)
            l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-legth"])
            l_matches           = l_wordlist_instance.matches_by_value(l_etag_len)

            m_counters.inc(l_matches, l_action, l_weight, "etag-length", message="ETag length: " + str(l_etag_len))


            #
            # ETag Quotes
            # ================
            #
            l_etag_striped          = l_etag_header.strip()
            if l_etag_striped.startswith("\"") or l_etag_striped.startswith("'"):
                l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-quotes"])
                l_matches           = l_wordlist_instance.matches_by_value(l_etag_striped[0])

                m_counters.inc(l_matches, l_action, l_weight, "etag-quotes", message="Etag quotes: " + l_etag_striped[0])

        if "Vary" in l_response.headers:
            l_vary_header       = l_response.headers["Vary"]
            #
            # Vary delimiter
            # ================
            #
            # Checks if Vary header delimiter is something like this:
            # -> Vary: Accept-Encoding,User-Agent
            # Or this:
            # -> Vary: Accept-Encoding, User-Agent
            #
            l_var_delimiter     = ", " if l_vary_header.find(", ") else ","
            l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-delimiter"])
            l_matches           = l_wordlist_instance.matches_by_value(l_var_delimiter)

            m_counters.inc(l_matches, l_action, l_weight, "vary-delimiter", message="Vary delimiter: " + l_var_delimiter)

            #
            # Vary capitalizer
            # ================
            #
            # Checks if Vary header delimiter is something like this:
            # -> Vary: Accept-Encoding,user-Agent
            # Or this:
            # -> Vary: accept-encoding,user-agent
            #
            l_vary_capitalizer  = str(0 if l_vary_header == l_vary_header.lower() else 1)
            l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-capitalize"])
            l_matches           = l_wordlist_instance.matches_by_value(l_vary_capitalizer)

            m_counters.inc(l_matches, l_action, l_weight, "vary-capitalize", message="Vary capitalizer: " + l_vary_capitalizer)


            #
            # Vary order
            # ================
            #
            # Checks order between vary values:
            # -> Vary: Accept-Encoding,user-Agent
            # Or this:
            # -> Vary: User-Agent,Accept-Encoding
            #
            l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-order"])
            l_matches           = l_wordlist_instance.matches_by_value(l_vary_header)

            m_counters.inc(l_matches, l_action, l_weight, "vary-order", message="Vary order: " + l_vary_header)


        #
        # =====================
        # HTTP specific options
        # =====================
        #
        #
        if l_action == "HEAD":
            #
            # HEAD Options
            # ============
            #
            l_option            = l_response.headers.get("Allow")
            if l_option:
                l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"])
                # Looking for matches
                l_matches           = l_wordlist_instance.matches_by_value(l_option)

                m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="HEAD option: " + l_option)


        if l_action == "OPTIONS" or l_action == "INVALID" or l_action == "DELETE":
            if "Allow" in l_response.headers:
                #
                # Options allow
                # =============
                #
                l_option            = l_response.headers.get("Allow")
                if l_option:
                    l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"])
                    # Looking for matches
                    l_matches           = l_wordlist_instance.matches_by_value(l_option)

                    m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="OPTIONS allow: "  + l_action + " # " + l_option)


                #
                # Allow delimiter
                # ===============
                #
                l_option            = l_response.headers.get("Allow")
                if l_option:
                    l_var_delimiter     = ", " if l_option.find(", ") else ","
                    l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-delimited"])
                    # Looking for matches
                    l_matches           = l_wordlist_instance.matches_by_value(l_var_delimiter)

                    m_counters.inc(l_matches, l_action, l_weight, "options-delimiter", message="OPTION allow delimiter " + l_action + " # " + l_option)


            if "Public" in l_response.headers:
                #
                # Public response
                # ===============
                #
                l_option            = l_response.headers.get("Public")
                if l_option:
                    l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"])
                    # Looking for matches
                    l_matches           = l_wordlist_instance.matches_by_value(l_option)

                    m_counters.inc(l_matches, l_action, l_weight, "options-public", message="Public response: " + l_action + " # " + l_option)
Ejemplo n.º 29
0
    def recv_info(self, info):

        m_url = info.url

        Logger.log_more_verbose("Start to process URL: %r" % m_url)

        #
        # Get the remote web server fingerprint
        #
        m_webserver_finger = info.get_associated_informations_by_category(WebServerFingerprint.information_type)

        m_wordlist = set()
        # There is fingerprinting information?
        if m_webserver_finger:

            m_webserver_finger = m_webserver_finger.pop()

            m_server_canonical_name = m_webserver_finger.name_canonical
            m_servers_related       = m_webserver_finger.related # Set with related web servers

            #
            # Load wordlists
            #
            m_wordlist_update  = m_wordlist.update

            # Common wordlist
            try:
                w = Config.plugin_extra_config["common"]
                m_wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                pass


            # Wordlist of server name
            try:
                w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name]
                m_wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                pass

            # Wordlist of related with the server found
            try:
                for l_servers_related in m_servers_related:
                    w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name]
                    m_wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                pass

        else:

            # Common wordlists
            try:
                w = Config.plugin_extra_config["common"]
                m_wordlist.update([l_w for l_w in w.itervalues()])
            except KeyError:
                pass


        # Load content of wordlists
        m_urls           = set()
        m_urls_update    = m_urls.update

        # Fixed Url
        m_url_fixed      = m_url if m_url.endswith("/") else "%s/" % m_url

        for l_w in m_wordlist:
            # Use a copy of wordlist to avoid modify the original source
            l_loaded_wordlist = WordListLoader.get_advanced_wordlist_as_list(l_w)

            m_urls_update((urljoin(m_url_fixed, (l_wo[1:] if l_wo.startswith("/") else l_wo)) for l_wo in l_loaded_wordlist))

        # Generates the error page
        m_error_response = get_error_page(m_url)

        # Create the matching analyzer
        try:
            m_store_info = MatchingAnalyzer(m_error_response, min_ratio=0.65)
        except ValueError:
            # Thereis not information
            return

        # Create the partial funs
        _f = partial(process_url,
                     severity_vectors['predictables'],
                     get_http_method(m_url),
                     m_store_info,
                     self.update_status,
                     len(m_urls))

        # Process the URLs
        for i, l_url in enumerate(m_urls):
            _f((i, l_url))

        # Generate and return the results.
        return generate_results(m_store_info.unique_texts)
Ejemplo n.º 30
0
    def test__load_wordlists_input(self):
        # Reload wordlist
        WordListLoader._WordListLoader__load_wordlists("../../wordlist")

        # Check
        assert len(WordListLoader._WordListLoader__store) != 0
Ejemplo n.º 31
0
    def run(self, info):

        # Get the root domain only.
        root = info.root

        # Skip localhost.
        if root == "localhost":
            return

        # Skip root domains we've already processed.
        if self.state.put(root, True):
            return


        # Load the subdomains wordlist.
        try:
            wordlist = WordListLoader.get_wordlist_as_list(Config.plugin_args["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"])
            return

        # Load the subdomains whitelist.
        try:
            whitelist = WordListLoader.get_wordlist_as_list(Config.plugin_config["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_config["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_config["wordlist"])
            return


        #
        # Set a base line for dinamyc sub-domains
        #
        m_virtual_domains = []
        for v in (generate_random_string(40) for x in xrange(3)):
            l_subdomain = ".".join((v, root))

            records = DNS.get_a(l_subdomain, also_CNAME=True)

            for rec in records:
                if rec.type == "CNAME":
                    m_virtual_domains.append(rec.target)

        # If 3 subdomains are the same, set the base domain
        m_base_domain = None
        if len(set(m_virtual_domains)) == 1:
            m_base_domain = m_virtual_domains[0]

        # Configure the progress notifier.
        self.progress.set_total(len(wordlist))
        self.progress.min_delta = 1  # notify every 1%

        # For each subdomain in the wordlist...
        found   = 0
        results = []
        visited = set()
        for prefix in wordlist:

            # Mark as completed before actually trying.
            # We can't put this at the end of the loop where it belongs,
            # because the "continue" statements would skip over this too.
            self.progress.add_completed()

            # Build the domain name.
            name = ".".join((prefix, root))

            # Skip if out of scope.
            if name not in Config.audit_scope:
                continue

            # Resolve the subdomain.
            records = DNS.get_a(name, also_CNAME=True)
            records.extend( DNS.get_aaaa(name, also_CNAME=True) )

            # If no DNS records were found, skip.
            if not records:
                continue

            # If CNAME is the base domain, skip
            chk = [True for x in records if x.type == "CNAME" and x.target == m_base_domain]
            if len(chk) > 0 and all(chk):
                continue

            # We found a subdomain!
            found += 1
            Logger.log_more_verbose(
                "Subdomain found: %s" % name)

            # Create the Domain object for the subdomain.
            domain = Domain(name)
            results.append(domain)

            #
            # Check for Domain disclosure
            #
            if prefix not in whitelist:
                d = DomainDisclosure(domain,
                                     risk        = 0,
                                     level       = "low",
                                     title       = "Possible subdomain leak",
                                     description = "A subdomain was discovered which may be an unwanted information disclosure."
                                     )
                results.append(d)


            # For each DNs record, grab the address or name.
            # Skip duplicated records.
            for rec in records:
                if rec.type == "CNAME":
                    location = rec.target
                elif rec.type in ("A", "AAAA"):
                    location = rec.address
                else: # should not happen...
                    results.append(rec)
                    domain.add_information(rec)
                    continue
                if location not in visited:
                    visited.add(location)
                    results.append(rec)
                    domain.add_information(rec)

        # Log the results.
        if found:
            Logger.log(
                "Found %d subdomains for root domain: %s"
                % (found, root))
        else:
            Logger.log_verbose(
                "No subdomains found for root domain: %s" % root)

        # Return the results.
        return results
Ejemplo n.º 32
0
    def recv_info(self, info):

        # Get the root domain only.
        root = info.root

        # Skip localhost.
        if root == "localhost":
            return

        # Skip root domains we've already processed.
        if self.state.put(root, True):
            return

        # Load the subdomains wordlist.
        try:
            wordlist = WordListLoader.get_advanced_wordlist_as_list(
                Config.plugin_args["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." %
                                     Config.plugin_args["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." %
                                     Config.plugin_args["wordlist"])
            return

        # Configure the progress notifier.
        self.progress.set_total(len(wordlist))
        self.progress.min_delta = 1  # notify every 1%

        # For each subdomain in the wordlist...
        found = 0
        results = []
        visited = set()
        for prefix in wordlist:

            # Mark as completed before actually trying.
            # We can't put this at the end of the loop where it belongs,
            # because the "continue" statements would skip over this too.
            self.progress.add_completed()

            # Build the domain name.
            name = ".".join((prefix, root))

            # Skip if out of scope.
            if name not in Config.audit_scope:
                continue

            # Resolve the subdomain.
            records = DNS.get_a(name, also_CNAME=True)
            records.extend(DNS.get_aaaa(name, also_CNAME=True))

            # If no DNS records were found, skip.
            if not records:
                continue

            # We found a subdomain!
            found += 1
            Logger.log_more_verbose("Subdomain found: %s" % name)

            # Create the Domain object for the subdomain.
            domain = Domain(name)
            results.append(domain)

            # For each DNs record, grab the address or name.
            # Skip duplicated records.
            for rec in records:
                if rec.type == "CNAME":
                    location = rec.target
                elif rec.type in ("A", "AAAA"):
                    location = rec.address
                else:  # should not happen...
                    results.append(rec)
                    domain.add_information(rec)
                    continue
                if location not in visited:
                    visited.add(location)
                    results.append(rec)
                    domain.add_information(rec)

        # Log the results.
        if found:
            Logger.log("Found %d subdomains for root domain: %s" %
                       (found, root))
        else:
            Logger.log_verbose("No subdomains found for root domain: %s" %
                               root)

        # Return the results.
        return results
Ejemplo n.º 33
0
    def test__load_wordlists_input(self):
        # Reload wordlist
        WordListLoader._WordListLoader__load_wordlists("../../wordlist")

        # Check
        assert len(WordListLoader._WordListLoader__store) != 0
Ejemplo n.º 34
0
    def recv_info(self, info):

        m_url = info.url

        Logger.log_more_verbose("Start to process URL: %r" % m_url)

        #
        # Get the remote web server fingerprint
        #
        m_webserver_finger = info.get_associated_informations_by_category(WebServerFingerprint.information_type)

        m_wordlist = set()
        # There is fingerprinting information?
        if m_webserver_finger:

            m_webserver_finger = m_webserver_finger.pop()

            m_server_canonical_name = m_webserver_finger.name_canonical
            m_servers_related       = m_webserver_finger.related # Set with related web servers

            #
            # Load wordlists
            #
            m_wordlist_update  = m_wordlist.update

            # Common wordlist
            try:
                w = Config.plugin_extra_config["common"]
                m_wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                Logger.log_error("Can't load wordlists")


            # Wordlist of server name
            try:
                w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name]
                m_wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                Logger.log_error("Can't load wordlists")

            # Wordlist of related with the server found
            try:
                for l_servers_related in m_servers_related:
                    w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name]
                    m_wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                Logger.log_error("Can't load wordlists")

        else:

            # Common wordlists
            try:
                w = Config.plugin_extra_config["common"]
                m_wordlist.update([l_w for l_w in w.itervalues()])
            except KeyError:
                Logger.log_error("Can't load wordlists")


        # Load content of wordlists
        m_urls           = set()
        m_urls_update    = m_urls.update

        # Fixed Url
        m_url_fixed      = m_url if m_url.endswith("/") else "%s/" % m_url

        for l_w in m_wordlist:
            # Use a copy of wordlist to avoid modify the original source
            l_loaded_wordlist = WordListLoader.get_advanced_wordlist_as_list(l_w)

            m_urls_update((urljoin(m_url_fixed, (l_wo[1:] if l_wo.startswith("/") else l_wo)) for l_wo in l_loaded_wordlist))

        # Generates the error page
        m_error_response = get_error_page(m_url)

        # Create the matching analyzer
        try:
            m_store_info = MatchingAnalyzer(m_error_response, min_ratio=0.65)
        except ValueError:
            # Thereis not information
            return

        # Create the partial funs
        _f = partial(process_url,
                     severity_vectors['predictables'],
                     get_http_method(m_url),
                     m_store_info,
                     self.update_status,
                     len(m_urls))

        # Process the URLs
        for i, l_url in enumerate(m_urls):
            _f((i, l_url))

        # Generate and return the results.
        return generate_results(m_store_info.unique_texts)
Ejemplo n.º 35
0
    def check_download(self, url, name, content_length, content_type):

        # Only accept content when the content type header is present.
        if not content_type:
            Logger.log_more_verbose("Skipping URL, missing content type: %s" %
                                    url)
            return False

        # Is the content length present?
        if content_length is not None:

            # Check the file doesn't have 0 bytes.
            if content_length <= 0:
                Logger.log_more_verbose("Skipping URL, empty content: %s" %
                                        url)
                return False

            # Check the file is not too big.
            if content_type.strip().lower().startswith("text/"):
                if content_length > 100000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s" %
                        (content_length, url))
                    return False
            else:
                if content_length > 5000000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s" %
                        (content_length, url))
                    return False

            # Approved!
            return True

        # Content length absent but likely points to a directory index.
        parsed_url = parse_url(url)
        if not parsed_url.filename:

            # Approved!
            return True

        # Extension absent.
        if not parsed_url.extension:

            # Approved!
            return True

        # Match against a known list of valid HTML extensions.
        # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage
        if parsed_url.extension in (".xml", ".html", ".htm", ".xhtml", ".xht",
                                    ".mht", ".mhtml", ".maff", ".asp", ".aspx",
                                    ".bml", ".cfm", ".cgi", ".ihtml", ".jsp",
                                    ".las", ".lasso", ".lassoapp", ".pl",
                                    ".php", ".php3", ".phtml", ".rna", ".r",
                                    ".rnx", ".shtml", ".stm", ".atom", ".xml",
                                    ".eml", ".jsonld", ".metalink", ".met",
                                    ".rss", ".xml", ".markdown"):

            # Approved!
            return True

        # If URL path in blacklist?
        m_forbidden = [
            x for x in WordListLoader.get_wordlist_as_list(
                Config.plugin_config["wordlist_no_spider"])
        ]
        if any(x in url for x in m_forbidden):
            return False

        # Success!
        return True
Ejemplo n.º 36
0
class Spider(TestingPlugin):
    """
    This plugin is a web spider.
    """

    #--------------------------------------------------------------------------
    def get_accepted_types(self):
        return [URL]

    #--------------------------------------------------------------------------
    def run(self, info):

        m_return = []

        m_url = info.url
        Logger.log_verbose("Spidering URL: %s" % m_url)

        # Check if need follow first redirect, then follow the link.
        p = None
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                (info.depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url,
                         self.check_download,
                         allow_redirects=allow_redirects)
        except NetworkException, e:
            Logger.log_error_verbose("Error while processing %r: %s" %
                                     (m_url, str(e)))

        if not p:
            return m_return

        # Send back the data
        m_return.append(p)

        # TODO: If it's a 301 response, get the Location header

        # Get links
        m_forms = None
        if p.information_type == HTML.data_subtype:
            m_links = extract_from_html(p.raw_data, m_url)
            m_forms = extract_forms_from_html(p.raw_data, m_url)
            #m_links.update( extract_from_text(p.raw_data, m_url) )
        elif p.information_type == Text.data_subtype:
            m_links = extract_from_text(p.raw_data, m_url)
        else:
            return m_return
        try:
            m_links.remove(m_url)
        except Exception:
            pass

        # Do not follow URLs that contain certain keywords
        m_forbidden = [
            x for x in WordListLoader.get_wordlist_as_list(
                Config.plugin_config["wordlist_no_spider"])
        ]
        m_urls_allowed = [
            url for url in m_links if not any(x in url for x in m_forbidden)
        ]
        m_urls_not_allowed = m_links.difference(m_urls_allowed)
        if m_urls_not_allowed:
            Logger.log_more_verbose("Skipped forbidden URLs:\n    %s" %
                                    "\n    ".join(sorted(m_urls_not_allowed)))

        # Do not follow URLs out of scope
        m_urls_in_scope = []
        m_broken = []
        for url in m_urls_allowed:
            try:
                if url in Config.audit_scope:
                    m_urls_in_scope.append(url)
            except Exception:
                m_broken.append(url)
        if m_broken:
            if len(m_broken) == 1:
                Logger.log_more_verbose("Skipped uncrawlable URL: %s" %
                                        m_broken[0])
            else:
                Logger.log_more_verbose("Skipped uncrawlable URLs:\n    %s" %
                                        "\n    ".join(sorted(m_broken)))
        m_out_of_scope_count = len(m_urls_allowed) - len(
            m_urls_in_scope) - len(m_broken)
        if m_out_of_scope_count:
            Logger.log_more_verbose("Skipped %d links out of scope." %
                                    m_out_of_scope_count)

        if m_urls_in_scope:
            Logger.log_verbose("Found %d links in URL: %s" %
                               (len(m_urls_allowed), m_url))
        else:
            Logger.log_more_verbose("No links found in URL: %s" % m_url)

        # Convert to URL data type
        for u in m_urls_in_scope:
            try:
                p = parse_url(u)
                if p.scheme == "mailto":
                    m_resource = Email(p.netloc)
                elif p.scheme in ("http", "https"):
                    m_resource = URL(url=u, referer=m_url)
            except Exception:
                warn(format_exc(), RuntimeWarning)
            print m_resource
            m_resource.add_resource(info)
            m_return.append(m_resource)

        # Get forms info
        if m_forms:
            m_forms_allowed = [
                url for url in m_forms
                if not any(x in url[0] for x in m_forbidden)
            ]
            m_forms_not_allowed = {x[0]
                                   for x in m_forms
                                   }.difference(x[0] for x in m_forms_allowed)
        else:
            m_forms_allowed = []
            m_forms_not_allowed = set()

        if m_forms_not_allowed:
            Logger.log_more_verbose("Skipped forbidden forms:\n    %s" %
                                    "\n    ".join(sorted(m_forms_not_allowed)))

        # Do not follow forms out of scope
        m_forms_in_scope = []
        m_broken = []
        for url in m_forms_allowed:
            try:
                if url[0] in Config.audit_scope:
                    m_forms_in_scope.append(url)
            except Exception:
                m_broken.append(url[0])

        if m_broken:
            if len(m_broken) == 1:
                Logger.log_more_verbose("Skipped uncrawlable forms: %s" %
                                        m_broken[0])
            else:
                Logger.log_more_verbose("Skipped uncrawlable forms:\n    %s" %
                                        "\n    ".join(sorted(m_broken)))
        m_out_of_scope_count = len(m_forms_allowed) - len(
            m_forms_in_scope) - len(m_broken)
        if m_out_of_scope_count:
            Logger.log_more_verbose("Skipped %d forms out of scope." %
                                    m_out_of_scope_count)

        if m_forms_in_scope:
            Logger.log_verbose("Found %d forms in URL: %s" %
                               (len(m_forms_in_scope), m_url))
        else:
            Logger.log_more_verbose("No forms found in URL: %s" % m_url)

        # Convert to URL data type
        for u in m_forms_in_scope:
            try:
                url = u[0]
                method = u[1]
                params = {x["name"]: x["value"] for x in u[2]}

                m_resource = URL(url=url,
                                 referer=m_url,
                                 method=method,
                                 post_params=params)
            except Exception:
                warn(format_exc(), RuntimeWarning)
            m_resource.add_resource(info)
            m_return.append(m_resource)

        # Send the results
        return m_return
Ejemplo n.º 37
0
    def analyze_html(self, info):

        #----------------------------------------------------------------------
        # Get malware suspicious links.

        Logger.log_more_verbose("Processing HTML: %s" % info.identity)

        # Load the malware wordlist.
        wordlist_filename = Config.plugin_config["malware_sites"]
        try:
            wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename)
        except WordlistNotFound:
            Logger.log_error("Wordlist '%s' not found.." % wordlist_filename)
            return
        except TypeError:
            Logger.log_error("Wordlist '%s' is not a file." %
                             wordlist_filename)
            return
        if not wordlist:
            Logger.log_error("Wordlist '%s' is empty." % wordlist_filename)

        Logger.log("1")

        # Get links
        base_urls = set()
        for url in info.find_linked_data(Data.TYPE_RESOURCE,
                                         Resource.RESOURCE_URL):
            m_url = url.url
            base_urls.add(m_url)
            if info.information_type == Information.INFORMATION_HTML:
                m_links = extract_from_html(info.raw_data, m_url)
                m_links.update(extract_from_text(info.raw_data, m_url))
            elif info.information_type == Information.INFORMATION_PLAIN_TEXT:
                m_links = extract_from_text(info.raw_data, m_url)
            else:
                raise Exception("Internal error!")
        m_links.difference_update(base_urls)

        Logger.log("2")

        # If we have no links, abort now
        if not m_links:
            Logger.log_verbose("No output links found.")
            return

        # Do not follow URLs that contain certain keywords
        m_forbidden = WordListLoader.get_wordlist_as_raw(
            Config.plugin_config["wordlist_no_spider"])
        m_urls_allowed = {
            url
            for url in m_links
            if url and not any(x in url for x in m_forbidden)
        }

        Logger.log("3")

        # Get only output links
        m_output_links = []
        for url in m_urls_allowed:
            try:
                if url not in Config.audit_scope:
                    m_output_links.append(url)
            except Exception, e:
                Logger.log_error_more_verbose(format_exc())
Ejemplo n.º 38
0
    def recv_info(self, info):

        m_domain = info.root

        # Skips localhost
        if m_domain == "localhost":
            return

        m_return = None

        # Checks if the hostname has been already processed
        if not self.state.check(m_domain):

            #
            # Looking for
            #
            m_subdomains = WordListLoader.get_advanced_wordlist_as_list(
                "subs_small.txt")

            # Run in parallel
            self.base_domain = m_domain
            self.completed = Counter(0)
            self.total = len(m_subdomains)
            r = pmap(self.get_subdomains_bruteforcer,
                     m_subdomains,
                     pool_size=10)

            #
            # Remove repeated
            #

            # The results
            m_domains = set()
            m_domains_add = m_domains.add
            m_domains_already = []
            m_domains_already_append = m_domains_already.append

            m_ips = set()
            m_ips_add = m_ips.add
            m_ips_already = []
            m_ips_already_append = m_ips_already.append

            if r:
                for doms in r:
                    for dom in doms:
                        # Domains
                        if dom.type == "CNAME":
                            if not dom.target in m_domains_already:
                                m_domains_already_append(dom.target)
                                if dom.target in Config.audit_scope:
                                    m_domains_add(dom)
                                else:
                                    discard_data(dom)

                        # IPs
                        if dom.type == "A":
                            if dom.address not in m_ips_already:
                                m_ips_already_append(dom.address)
                                m_ips_add(dom)

                # Unify
                m_domains.update(m_ips)

                m_return = m_domains

                # Add the information to the host
                map(info.add_information, m_return)

            # Set the domain as processed
            self.state.set(m_domain, True)

            Logger.log_verbose("DNS analyzer plugin found %d subdomains" %
                               len(m_return))

            # Write the info as more user friendly
            if Logger.MORE_VERBOSE:
                m_tmp = []
                m_tmp_append = m_tmp.append
                for x in m_return:
                    if getattr(x, "address", False):
                        m_tmp_append("%s (%s)" %
                                     (getattr(x, "address"), str(x)))
                    elif getattr(x, "target", False):
                        m_tmp_append("%s (%s)" %
                                     (getattr(x, "target"), str(x)))
                    else:
                        m_tmp_append(str(x))

                Logger.log_more_verbose("Subdomains found: \n\t+ %s" %
                                        "\n\t+ ".join(m_tmp))

        return m_return
Ejemplo n.º 39
0
    def recv_info(self, info):

        # Get the root domain only.
        root = info.root

        # Skip localhost.
        if root == "localhost":
            return

        # Skip root domains we've already processed.
        if self.state.put(root, True):
            return


        # Load the subdomains wordlist.
        try:
            wordlist = WordListLoader.get_advanced_wordlist_as_list(Config.plugin_args["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"])
            return

        # Load the subdomains whitelist.
        try:
            whitelist = WordListLoader.get_advanced_wordlist_as_list(Config.plugin_config["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_config["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_config["wordlist"])
            return


        #
        # Set a base line for dinamyc sub-domains
        #
        m_virtual_domains = []
        for v in (generate_random_string(40) for x in xrange(3)):
            l_subdomain = ".".join((v, root))

            records = DNS.get_a(l_subdomain, also_CNAME=True)

            for rec in records:
                if rec.type == "CNAME":
                    m_virtual_domains.append(rec.target)

        # If 3 subdomains are the same, set the base domain
        m_base_domain = None
        if len(set(m_virtual_domains)) == 1:
            m_base_domain = m_virtual_domains[0]

        # Configure the progress notifier.
        self.progress.set_total(len(wordlist))
        self.progress.min_delta = 1  # notify every 1%

        # For each subdomain in the wordlist...
        found   = 0
        results = []
        visited = set()
        for prefix in wordlist:

            # Mark as completed before actually trying.
            # We can't put this at the end of the loop where it belongs,
            # because the "continue" statements would skip over this too.
            self.progress.add_completed()

            # Build the domain name.
            name = ".".join((prefix, root))

            # Skip if out of scope.
            if name not in Config.audit_scope:
                continue

            # Resolve the subdomain.
            records = DNS.get_a(name, also_CNAME=True)
            records.extend( DNS.get_aaaa(name, also_CNAME=True) )

            # If no DNS records were found, skip.
            if not records:
                continue

            # If CNAME is the base domain, skip
            chk = [True for x in records if x.type == "CNAME" and x.target == m_base_domain]
            if len(chk) > 0 and all(chk):
                continue

            # We found a subdomain!
            found += 1
            Logger.log_more_verbose(
                "Subdomain found: %s" % name)

            # Create the Domain object for the subdomain.
            domain = Domain(name)
            results.append(domain)

            #
            # Check for Domain disclosure
            #
            if prefix not in whitelist:
                d = DomainDisclosure(name,
                                     risk        = 0,
                                     level       = "low",
                                     title       = "Possible subdomain leak",
                                     description = "A subdomain was discovered which may be an unwanted information disclosure."
                                     )
                d.add_resource(domain)
                results.append(d)


            # For each DNs record, grab the address or name.
            # Skip duplicated records.
            for rec in records:
                if rec.type == "CNAME":
                    location = rec.target
                elif rec.type in ("A", "AAAA"):
                    location = rec.address
                else: # should not happen...
                    results.append(rec)
                    domain.add_information(rec)
                    continue
                if location not in visited:
                    visited.add(location)
                    results.append(rec)
                    domain.add_information(rec)

        # Log the results.
        if found:
            Logger.log(
                "Found %d subdomains for root domain: %s"
                % (found, root))
        else:
            Logger.log_verbose(
                "No subdomains found for root domain: %s" % root)

        # Return the results.
        return results
Ejemplo n.º 40
0
        urls = set()
        #Logger.log(urls)

        for l_w in new_file:
            try:
                l_w = l_w[1:] if l_w.startswith("/") else l_w
                tmp_u = urljoin(m_url, l_w)
            except ValueError, e:
                Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u)
                continue

            urls.add(tmp_u)

        for l_w in wordlist:
            # Use a copy of wordlist to avoid modify the original source
            l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w)

            for l_wo in l_loaded_wordlist:
                try:
                    l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo
                    tmp_u = urljoin(m_url, l_wo)
                except ValueError, e:
                    Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u)
                    continue

                urls.add(tmp_u)

        Logger.log_verbose("Loaded %s URLs to test." % len(urls))

        # Generates the error page
        error_response = get_error_page(m_url)
Ejemplo n.º 41
0
class Spider(TestingPlugin):
    """
    This plugin is a web spider.
    """

    #----------------------------------------------------------------------
    def get_accepted_info(self):
        return [Url]

    #----------------------------------------------------------------------
    def recv_info(self, info):

        m_return = []

        m_url = info.url
        m_depth = info.depth

        # Check depth
        if Config.audit_config.depth is not None and m_depth > Config.audit_config.depth:
            Logger.log_more_verbose("Spider depth level exceeded for URL: %s" %
                                    m_url)
            return m_return

        Logger.log_verbose("Spidering URL: %r" % m_url)

        # Check if need follow first redirect
        p = None
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                             (m_depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url,
                         self.check_download,
                         allow_redirects=allow_redirects)
        except NetworkException, e:
            Logger.log_more_verbose("Error while processing %r: %s" %
                                    (m_url, str(e)))
        if not p:
            return m_return

        # Send back the data
        m_return.append(p)

        # TODO: If it's a 301 response, get the Location header

        # Get links
        if p.information_type == Information.INFORMATION_HTML:
            m_links = extract_from_html(p.raw_data, m_url)
        else:
            m_links = extract_from_text(p.raw_data, m_url)
        try:
            m_links.remove(m_url)
        except Exception:
            pass

        # Do not follow URLs that contain certain keywords
        m_forbidden = WordListLoader.get_wordlist(
            Config.plugin_config["wordlist_no_spider"])
        m_urls_allowed = [
            url for url in m_links if not any(x in url for x in m_forbidden)
        ]
        m_urls_not_allowed = m_links.difference(m_urls_allowed)
        if m_urls_not_allowed:
            Logger.log_more_verbose("Skipped forbidden URLs:\n    %s" %
                                    "\n    ".join(sorted(m_urls_not_allowed)))

        # Do not follow URLs out of scope
        m_out_of_scope_count = len(m_urls_allowed)
        m_urls_allowed = [
            url for url in m_urls_allowed if url in Config.audit_scope
        ]
        m_out_of_scope_count -= len(m_urls_allowed)
        if m_out_of_scope_count:
            Logger.log_more_verbose("Skipped %d links out of scope." %
                                    m_out_of_scope_count)

        if m_urls_allowed:
            Logger.log_verbose("Found %d links in URL: %s" %
                               (len(m_urls_allowed), m_url))
        else:
            Logger.log_verbose("No links found in URL: %s" % m_url)

        # Convert to Url data type
        for u in m_urls_allowed:
            m_resource = Url(url=u, depth=m_depth + 1, referer=m_url)
            m_resource.add_resource(info)
            m_return.append(m_resource)

        # Send the results
        return m_return