Python WordListLoader.get_wordlist_as_list Examples, golismero.api.text.wordlist.WordListLoader.get_wordlist_as_list Python Examples

Example #1

0

Show file

def load_wordlists(wordlists):
    """
    Load the with names pased as parameter.

    This function receives a list of names of wordlist, defined in plugin
    configuration file, and return a dict with instances of wordlists.

    :param wordlists: list with wordlists names
    :type wordlists: list

    :returns: A dict with wordlists
    :rtype: dict
    """

    m_tmp_wordlist = {}

    # Get wordlist to load
    for l_w in wordlists:
        for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems(
        ):
            if wordlist_family.lower() in l_w.lower():
                m_tmp_wordlist[l_w] = l_wordlists

    # Load the wordlist
    m_return = {}
    for k, w_paths in m_tmp_wordlist.iteritems():
        m_return[k] = [WordListLoader.get_wordlist_as_list(w) for w in w_paths]

    return m_return

Example #2

0

Show file

File: brute_url.py Project: Autoscan/golismero

def load_wordlists(wordlists):
    """
    Load the with names pased as parameter.

    This function receives a list of names of wordlist, defined in plugin
    configuration file, and return a dict with instances of wordlists.

    :param wordlists: list with wordlists names
    :type wordlists: list

    :returns: A dict with wordlists
    :rtype: dict
    """

    m_tmp_wordlist = {}

    # Get wordlist to load
    for l_w in wordlists:
        for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems():
            if wordlist_family.lower() in l_w.lower():
                m_tmp_wordlist[l_w] = l_wordlists

    # Load the wordlist
    m_return = {}
    for k, w_paths in m_tmp_wordlist.iteritems():
        m_return[k] = [WordListLoader.get_wordlist_as_list(w) for w in w_paths]

    return m_return

Example #3

0

Show file

    def __detect_wordpress_installation(self, url, wordpress_urls):
        """
        Try to detect a wordpress instalation in the current path.

        :param url: URL where try to find the WordPress installation.
        :type url: str

        :param wordpress_urls: string with wordlist name with WordPress URLs.
        :type wordpress_urls: str

        :return: True if wordpress installation found. False otherwise.
        :rtype: bool
        """
        Logger.log_more_verbose(
            "Detecting Wordpress instalation in URI: '%s'." % url)
        total_urls = 0
        urls_found = 0

        error_page = get_error_page(url).raw_data

        for u in WordListLoader.get_wordlist_as_list(wordpress_urls):
            total_urls += 1
            tmp_url = urljoin(url, u)

            r = HTTP.get_url(tmp_url, use_cache=False)
            if r.status == "200":

                # Try to detect non-default error pages
                ratio = get_diff_ratio(r.raw_response, error_page)
                if ratio < 0.35:
                    urls_found += 1

            discard_data(r)

        # If Oks > 85% continue
        if (urls_found / float(total_urls)) < 0.85:

            # If all fails, make another last test
            url_wp_admin = urljoin(url, "wp-admin/")

            try:
                p = HTTP.get_url(url_wp_admin,
                                 use_cache=False,
                                 allow_redirects=False)
                if p:
                    discard_data(p)
            except Exception, e:
                return False

            if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get(
                    "Location", ""):
                return True
            else:
                return False

Example #4

0

Show file

File: plecost.py Project: Autoscan/golismero

    def __detect_wordpress_installation(self, url, wordpress_urls):
        """
        Try to detect a wordpress instalation in the current path.

        :param url: URL where try to find the WordPress installation.
        :type url: str

        :param wordpress_urls: string with wordlist name with WordPress URLs.
        :type wordpress_urls: str

        :return: True if wordpress installation found. False otherwise.
        :rtype: bool
        """
        Logger.log_more_verbose("Detecting Wordpress instalation in URI: '%s'." % url)
        total_urls = 0
        urls_found = 0

        error_page = get_error_page(url).raw_data

        for u in WordListLoader.get_wordlist_as_list(wordpress_urls):
            total_urls += 1
            tmp_url = urljoin(url, u)

            r = HTTP.get_url(tmp_url, use_cache=False)
            if r.status == "200":

                # Try to detect non-default error pages
                ratio = get_diff_ratio(r.raw_response, error_page)
                if ratio < 0.35:
                    urls_found += 1

            discard_data(r)

        # If Oks > 85% continue
        if (urls_found / float(total_urls)) < 0.85:

            # If all fails, make another last test
            url_wp_admin = urljoin(url, "wp-admin/")

            try:
                p = HTTP.get_url(url_wp_admin, use_cache=False, allow_redirects=False)
                if p:
                    discard_data(p)
            except Exception, e:
                return False

            if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get("Location", ""):
                return True
            else:
                return False

Example #5

0

Show file

File: dns_malware.py Project: Autoscan/golismero

    def run(self, info):

        # Make sure it's a CNAME record.
        # This is required because a plugin can't ask for a specific DNS
        # register type - all types are received together.
        if info.type != "CNAME":
            return

        # Get the root domain.
        root = info.target
        Logger.log_verbose(
            "Looking for poisoned domains at: *.%s" % root)

        # Load the malware URLs list.
        wordlist_filename = Config.plugin_args["wordlist"]
        try:
            wordlist = WordListLoader.get_wordlist_as_list(
                wordlist_filename)
        except WordlistNotFound:
            Logger.log_error_verbose(
                "Wordlist not found: " + wordlist_filename)
            return
        except TypeError:
            Logger.log_error_verbose(
                "Wordlist is not a file: " + wordlist_filename)
            return

        results = []
        root_set = set([root])

        for x in root_set.intersection(set(wordlist)):
            results.append(DNSPoisoning(info, x))

        # Log how many results we got.
        if results:
            Logger.log_verbose(
                "Discovered %s poisoned domains." % len(results))
        else:
            Logger.log_verbose("No poisoned domains found.")

        # Return the results.
        return results

Example #6

0

Show file

def get_list_from_wordlist(wordlist):
    """
    Load the content of the wordlist and return a set with the content.

    :param wordlist: wordlist name.
    :type wordlist: str

    :return: a set with the results.
    :rtype result_output: set
    """

    try:
        m_commom_wordlists = set()

        for v in Config.plugin_extra_config[wordlist].itervalues():
            m_commom_wordlists.update(WordListLoader.get_wordlist_as_list(v))

        return m_commom_wordlists
    except KeyError, e:
        Logger.log_error_more_verbose(str(e))
        return set()

Example #7

0

Show file

File: brute_url.py Project: Autoscan/golismero

def get_list_from_wordlist(wordlist):
    """
    Load the content of the wordlist and return a set with the content.

    :param wordlist: wordlist name.
    :type wordlist: str

    :return: a set with the results.
    :rtype result_output: set
    """

    try:
        m_commom_wordlists = set()

        for v in Config.plugin_extra_config[wordlist].itervalues():
            m_commom_wordlists.update(WordListLoader.get_wordlist_as_list(v))

        return m_commom_wordlists
    except KeyError,e:
        Logger.log_error_more_verbose(str(e))
        return set()

Example #8

0

Show file

File: dns_malware.py Project: 5l1v3r1/Golismero-1

    def run(self, info):

        # Make sure it's a CNAME record.
        # This is required because a plugin can't ask for a specific DNS
        # register type - all types are received together.
        if info.type != "CNAME":
            return

        # Get the root domain.
        root = info.target
        Logger.log_verbose("Looking for poisoned domains at: *.%s" % root)

        # Load the malware URLs list.
        wordlist_filename = Config.plugin_args["wordlist"]
        try:
            wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename)
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist not found: " +
                                     wordlist_filename)
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist is not a file: " +
                                     wordlist_filename)
            return

        results = []
        root_set = set([root])

        for x in root_set.intersection(set(wordlist)):
            results.append(DNSPoisoning(info, x))

        # Log how many results we got.
        if results:
            Logger.log_verbose("Discovered %s poisoned domains." %
                               len(results))
        else:
            Logger.log_verbose("No poisoned domains found.")

        # Return the results.
        return results

Example #9

0

Show file

class PredictablesDisclosureBruteforcer(TestingPlugin):

    #--------------------------------------------------------------------------
    def get_accepted_types(self):
        return [FolderURL]

    #--------------------------------------------------------------------------
    def run(self, info):

        m_url = info.url

        Logger.log_more_verbose("Start to process URL: %r" % m_url)

        # Server specified by param?
        webserver_finger = Config.plugin_args.get("server_banner", None)
        if webserver_finger:
            server_canonical_name = webserver_finger
            servers_related = []  # Set with related web servers
        else:
            # User fingerprint info
            webserver_finger = info.get_associated_informations_by_category(
                WebServerFingerprint.information_type)
            if webserver_finger:
                webserver_finger = webserver_finger.pop()

                server_canonical_name = webserver_finger.canonical_name
                servers_related = webserver_finger.related  # Set with related web servers

        wordlist = set()

        # Common wordlists
        try:
            w = Config.plugin_extra_config["common"]
            wordlist.update([l_w for l_w in w.itervalues()])
        except KeyError:
            Logger.log_error("Can't load common wordlists")

        # There is fingerprinting information?
        if webserver_finger:

            #
            # Load wordlists
            #
            wordlist_update = wordlist.update

            # Wordlist of server name
            try:
                w = Config.plugin_extra_config["%s_predictables" %
                                               server_canonical_name]
                wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError:
                Logger.log_error(
                    "Can't load predictables wordlists for server: '%s'." %
                    server_canonical_name)

            # Wordlist of related with the server found
            try:
                for l_servers_related in servers_related:
                    w = Config.plugin_extra_config["%s_predictables" %
                                                   l_servers_related]
                    wordlist_update([l_w for l_w in w.itervalues()])
            except KeyError, e:
                Logger.log_error(
                    "Can't load wordlists predictables wordlists for related webserver: '%s'"
                    % e)

        # Load content of wordlists
        urls = set()
        m_urls_update = urls.add

        for l_w in wordlist:
            # Use a copy of wordlist to avoid modify the original source
            l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w)

            for l_wo in l_loaded_wordlist:
                try:
                    l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo
                    tmp_u = urljoin(m_url, l_wo)
                except ValueError, e:
                    Logger.log_error(
                        "Failed to parse key, from wordlist, '%s'" % tmp_u)
                    continue

                m_urls_update(tmp_u)

Example #10

0

Show file

class Spider(TestingPlugin):
    """
    This plugin is a web spider.
    """

    #--------------------------------------------------------------------------
    def get_accepted_types(self):
        return [URL]

    #--------------------------------------------------------------------------
    def run(self, info):

        m_return = []

        m_url = info.url
        Logger.log_verbose("Spidering URL: %s" % m_url)

        # Check if need follow first redirect, then follow the link.
        p = None
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                (info.depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url,
                         self.check_download,
                         allow_redirects=allow_redirects)
        except NetworkException, e:
            Logger.log_error_verbose("Error while processing %r: %s" %
                                     (m_url, str(e)))

        if not p:
            return m_return

        # Send back the data
        m_return.append(p)

        # TODO: If it's a 301 response, get the Location header

        # Get links
        m_forms = None
        if p.information_type == HTML.data_subtype:
            m_links = extract_from_html(p.raw_data, m_url)
            m_forms = extract_forms_from_html(p.raw_data, m_url)
            #m_links.update( extract_from_text(p.raw_data, m_url) )
        elif p.information_type == Text.data_subtype:
            m_links = extract_from_text(p.raw_data, m_url)
        else:
            return m_return
        try:
            m_links.remove(m_url)
        except Exception:
            pass

        # Do not follow URLs that contain certain keywords
        m_forbidden = [
            x for x in WordListLoader.get_wordlist_as_list(
                Config.plugin_config["wordlist_no_spider"])
        ]
        m_urls_allowed = [
            url for url in m_links if not any(x in url for x in m_forbidden)
        ]
        m_urls_not_allowed = m_links.difference(m_urls_allowed)
        if m_urls_not_allowed:
            Logger.log_more_verbose("Skipped forbidden URLs:\n    %s" %
                                    "\n    ".join(sorted(m_urls_not_allowed)))

        # Do not follow URLs out of scope
        m_urls_in_scope = []
        m_broken = []
        for url in m_urls_allowed:
            try:
                if url in Config.audit_scope:
                    m_urls_in_scope.append(url)
            except Exception:
                m_broken.append(url)
        if m_broken:
            if len(m_broken) == 1:
                Logger.log_more_verbose("Skipped uncrawlable URL: %s" %
                                        m_broken[0])
            else:
                Logger.log_more_verbose("Skipped uncrawlable URLs:\n    %s" %
                                        "\n    ".join(sorted(m_broken)))
        m_out_of_scope_count = len(m_urls_allowed) - len(
            m_urls_in_scope) - len(m_broken)
        if m_out_of_scope_count:
            Logger.log_more_verbose("Skipped %d links out of scope." %
                                    m_out_of_scope_count)

        if m_urls_in_scope:
            Logger.log_verbose("Found %d links in URL: %s" %
                               (len(m_urls_allowed), m_url))
        else:
            Logger.log_more_verbose("No links found in URL: %s" % m_url)

        # Convert to URL data type
        for u in m_urls_in_scope:
            try:
                p = parse_url(u)
                if p.scheme == "mailto":
                    m_resource = Email(p.netloc)
                elif p.scheme in ("http", "https"):
                    m_resource = URL(url=u, referer=m_url)
            except Exception:
                warn(format_exc(), RuntimeWarning)
            print m_resource
            m_resource.add_resource(info)
            m_return.append(m_resource)

        # Get forms info
        if m_forms:
            m_forms_allowed = [
                url for url in m_forms
                if not any(x in url[0] for x in m_forbidden)
            ]
            m_forms_not_allowed = {x[0]
                                   for x in m_forms
                                   }.difference(x[0] for x in m_forms_allowed)
        else:
            m_forms_allowed = []
            m_forms_not_allowed = set()

        if m_forms_not_allowed:
            Logger.log_more_verbose("Skipped forbidden forms:\n    %s" %
                                    "\n    ".join(sorted(m_forms_not_allowed)))

        # Do not follow forms out of scope
        m_forms_in_scope = []
        m_broken = []
        for url in m_forms_allowed:
            try:
                if url[0] in Config.audit_scope:
                    m_forms_in_scope.append(url)
            except Exception:
                m_broken.append(url[0])

        if m_broken:
            if len(m_broken) == 1:
                Logger.log_more_verbose("Skipped uncrawlable forms: %s" %
                                        m_broken[0])
            else:
                Logger.log_more_verbose("Skipped uncrawlable forms:\n    %s" %
                                        "\n    ".join(sorted(m_broken)))
        m_out_of_scope_count = len(m_forms_allowed) - len(
            m_forms_in_scope) - len(m_broken)
        if m_out_of_scope_count:
            Logger.log_more_verbose("Skipped %d forms out of scope." %
                                    m_out_of_scope_count)

        if m_forms_in_scope:
            Logger.log_verbose("Found %d forms in URL: %s" %
                               (len(m_forms_in_scope), m_url))
        else:
            Logger.log_more_verbose("No forms found in URL: %s" % m_url)

        # Convert to URL data type
        for u in m_forms_in_scope:
            try:
                url = u[0]
                method = u[1]
                params = {x["name"]: x["value"] for x in u[2]}

                m_resource = URL(url=url,
                                 referer=m_url,
                                 method=method,
                                 post_params=params)
            except Exception:
                warn(format_exc(), RuntimeWarning)
            m_resource.add_resource(info)
            m_return.append(m_resource)

        # Send the results
        return m_return

Example #11

0

Show file

    def check_download(self, url, name, content_length, content_type):

        # Only accept content when the content type header is present.
        if not content_type:
            Logger.log_more_verbose("Skipping URL, missing content type: %s" %
                                    url)
            return False

        # Is the content length present?
        if content_length is not None:

            # Check the file doesn't have 0 bytes.
            if content_length <= 0:
                Logger.log_more_verbose("Skipping URL, empty content: %s" %
                                        url)
                return False

            # Check the file is not too big.
            if content_type.strip().lower().startswith("text/"):
                if content_length > 100000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s" %
                        (content_length, url))
                    return False
            else:
                if content_length > 5000000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s" %
                        (content_length, url))
                    return False

            # Approved!
            return True

        # Content length absent but likely points to a directory index.
        parsed_url = parse_url(url)
        if not parsed_url.filename:

            # Approved!
            return True

        # Extension absent.
        if not parsed_url.extension:

            # Approved!
            return True

        # Match against a known list of valid HTML extensions.
        # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage
        if parsed_url.extension in (".xml", ".html", ".htm", ".xhtml", ".xht",
                                    ".mht", ".mhtml", ".maff", ".asp", ".aspx",
                                    ".bml", ".cfm", ".cgi", ".ihtml", ".jsp",
                                    ".las", ".lasso", ".lassoapp", ".pl",
                                    ".php", ".php3", ".phtml", ".rna", ".r",
                                    ".rnx", ".shtml", ".stm", ".atom", ".xml",
                                    ".eml", ".jsonld", ".metalink", ".met",
                                    ".rss", ".xml", ".markdown"):

            # Approved!
            return True

        # If URL path in blacklist?
        m_forbidden = [
            x for x in WordListLoader.get_wordlist_as_list(
                Config.plugin_config["wordlist_no_spider"])
        ]
        if any(x in url for x in m_forbidden):
            return False

        # Success!
        return True

Example #12

0

Show file

File: brute_dns.py Project: 5l1v3r1/Golismero-1

    def run(self, info):

        # Get the root domain only.
        root = info.root

        # Skip localhost.
        if root == "localhost":
            return

        # Skip root domains we've already processed.
        if self.state.put(root, True):
            return


        # Load the subdomains wordlist.
        try:
            wordlist = WordListLoader.get_wordlist_as_list(Config.plugin_args["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"])
            return

        # Load the subdomains whitelist.
        try:
            whitelist = WordListLoader.get_wordlist_as_list(Config.plugin_config["wordlist"])
        except WordlistNotFound:
            Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_config["wordlist"])
            return
        except TypeError:
            Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_config["wordlist"])
            return


        #
        # Set a base line for dinamyc sub-domains
        #
        m_virtual_domains = []
        for v in (generate_random_string(40) for x in xrange(3)):
            l_subdomain = ".".join((v, root))

            records = DNS.get_a(l_subdomain, also_CNAME=True)

            for rec in records:
                if rec.type == "CNAME":
                    m_virtual_domains.append(rec.target)

        # If 3 subdomains are the same, set the base domain
        m_base_domain = None
        if len(set(m_virtual_domains)) == 1:
            m_base_domain = m_virtual_domains[0]

        # Configure the progress notifier.
        self.progress.set_total(len(wordlist))
        self.progress.min_delta = 1  # notify every 1%

        # For each subdomain in the wordlist...
        found   = 0
        results = []
        visited = set()
        for prefix in wordlist:

            # Mark as completed before actually trying.
            # We can't put this at the end of the loop where it belongs,
            # because the "continue" statements would skip over this too.
            self.progress.add_completed()

            # Build the domain name.
            name = ".".join((prefix, root))

            # Skip if out of scope.
            if name not in Config.audit_scope:
                continue

            # Resolve the subdomain.
            records = DNS.get_a(name, also_CNAME=True)
            records.extend( DNS.get_aaaa(name, also_CNAME=True) )

            # If no DNS records were found, skip.
            if not records:
                continue

            # If CNAME is the base domain, skip
            chk = [True for x in records if x.type == "CNAME" and x.target == m_base_domain]
            if len(chk) > 0 and all(chk):
                continue

            # We found a subdomain!
            found += 1
            Logger.log_more_verbose(
                "Subdomain found: %s" % name)

            # Create the Domain object for the subdomain.
            domain = Domain(name)
            results.append(domain)

            #
            # Check for Domain disclosure
            #
            if prefix not in whitelist:
                d = DomainDisclosure(domain,
                                     risk        = 0,
                                     level       = "low",
                                     title       = "Possible subdomain leak",
                                     description = "A subdomain was discovered which may be an unwanted information disclosure."
                                     )
                results.append(d)


            # For each DNs record, grab the address or name.
            # Skip duplicated records.
            for rec in records:
                if rec.type == "CNAME":
                    location = rec.target
                elif rec.type in ("A", "AAAA"):
                    location = rec.address
                else: # should not happen...
                    results.append(rec)
                    domain.add_information(rec)
                    continue
                if location not in visited:
                    visited.add(location)
                    results.append(rec)
                    domain.add_information(rec)

        # Log the results.
        if found:
            Logger.log(
                "Found %d subdomains for root domain: %s"
                % (found, root))
        else:
            Logger.log_verbose(
                "No subdomains found for root domain: %s" % root)

        # Return the results.
        return results

Example #13

0

Show file

File: gxhacking.py Project: elcodigok/golismero-devel

        urls = set()
        #Logger.log(urls)

        for l_w in new_file:
            try:
                l_w = l_w[1:] if l_w.startswith("/") else l_w
                tmp_u = urljoin(m_url, l_w)
            except ValueError, e:
                Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u)
                continue

            urls.add(tmp_u)

        for l_w in wordlist:
            # Use a copy of wordlist to avoid modify the original source
            l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w)

            for l_wo in l_loaded_wordlist:
                try:
                    l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo
                    tmp_u = urljoin(m_url, l_wo)
                except ValueError, e:
                    Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u)
                    continue

                urls.add(tmp_u)

        Logger.log_verbose("Loaded %s URLs to test." % len(urls))

        # Generates the error page
        error_response = get_error_page(m_url)

Example #14

0

Show file

File: spider_bak.py Project: blackye/luscan-devel

    def check_download(self, url, name, content_length, content_type):

        # Only accept content when the content type header is present.
        if not content_type:
            Logger.log_more_verbose(
                "Skipping URL, missing content type: %s" % url)
            return False

        # Is the content length present?
        if content_length is not None:

            # Check the file doesn't have 0 bytes.
            if content_length <= 0:
                Logger.log_more_verbose(
                    "Skipping URL, empty content: %s" % url)
                return False

            # Check the file is not too big.
            if content_type.strip().lower().startswith("text/"):
                if content_length > 100000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s"
                        % (content_length, url))
                    return False
            else:
                if content_length > 5000000:
                    Logger.log_more_verbose(
                        "Skipping URL, content too large (%d bytes): %s"
                        % (content_length, url))
                    return False

            # Approved!
            return True

        # Content length absent but likely points to a directory index.
        parsed_url = parse_url(url)
        if not parsed_url.filename:

            # Approved!
            return True

        # Extension absent.
        if not parsed_url.extension:

            # Approved!
            return True

        # Match against a known list of valid HTML extensions.
        # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage
        if parsed_url.extension in (
                ".xml", ".html", ".htm", ".xhtml", ".xht",
                ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml",
                ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso",
                ".lassoapp", ".pl", ".php", ".php3", ".phtml",
                ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom",
                ".xml", ".eml", ".jsonld", ".metalink", ".met",
                ".rss", ".xml", ".markdown"):

            # Approved!
            return True

        # If URL path in blacklist?
        m_forbidden = [x for x in WordListLoader.get_wordlist_as_list(Config.plugin_config["wordlist_no_spider"])]
        if any(x in url for x in m_forbidden):
            return False

        # Success!
        return True

Example #15

0

Show file

    def analyze_html(self, info):

        #----------------------------------------------------------------------
        # Get malware suspicious links.

        Logger.log_more_verbose("Processing HTML: %s" % info.identity)

        # Load the malware wordlist.
        wordlist_filename = Config.plugin_config["malware_sites"]
        try:
            wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename)
        except WordlistNotFound:
            Logger.log_error("Wordlist '%s' not found.." % wordlist_filename)
            return
        except TypeError:
            Logger.log_error("Wordlist '%s' is not a file." %
                             wordlist_filename)
            return
        if not wordlist:
            Logger.log_error("Wordlist '%s' is empty." % wordlist_filename)

        Logger.log("1")

        # Get links
        base_urls = set()
        for url in info.find_linked_data(Data.TYPE_RESOURCE,
                                         Resource.RESOURCE_URL):
            m_url = url.url
            base_urls.add(m_url)
            if info.information_type == Information.INFORMATION_HTML:
                m_links = extract_from_html(info.raw_data, m_url)
                m_links.update(extract_from_text(info.raw_data, m_url))
            elif info.information_type == Information.INFORMATION_PLAIN_TEXT:
                m_links = extract_from_text(info.raw_data, m_url)
            else:
                raise Exception("Internal error!")
        m_links.difference_update(base_urls)

        Logger.log("2")

        # If we have no links, abort now
        if not m_links:
            Logger.log_verbose("No output links found.")
            return

        # Do not follow URLs that contain certain keywords
        m_forbidden = WordListLoader.get_wordlist_as_raw(
            Config.plugin_config["wordlist_no_spider"])
        m_urls_allowed = {
            url
            for url in m_links
            if url and not any(x in url for x in m_forbidden)
        }

        Logger.log("3")

        # Get only output links
        m_output_links = []
        for url in m_urls_allowed:
            try:
                if url not in Config.audit_scope:
                    m_output_links.append(url)
            except Exception, e:
                Logger.log_error_more_verbose(format_exc())

Example #16

0

Show file

File: suspicious_url.py Project: Autoscan/golismero

    def analyze_html(self, info):


        #----------------------------------------------------------------------
        # Get malware suspicious links.

        Logger.log_more_verbose("Processing HTML: %s" % info.identity)

        # Load the malware wordlist.
        wordlist_filename = Config.plugin_config["malware_sites"]
        try:
            wordlist = WordListLoader.get_wordlist_as_list(
                wordlist_filename)
        except WordlistNotFound:
            Logger.log_error("Wordlist '%s' not found.." % wordlist_filename)
            return
        except TypeError:
            Logger.log_error(
                "Wordlist '%s' is not a file." % wordlist_filename)
            return
        if not wordlist:
            Logger.log_error("Wordlist '%s' is empty." % wordlist_filename)

        Logger.log("1")

        # Get links
        base_urls = set()
        for url in info.find_linked_data(Data.TYPE_RESOURCE,
                                         Resource.RESOURCE_URL):
            m_url = url.url
            base_urls.add(m_url)
            if info.information_type == Information.INFORMATION_HTML:
                m_links = extract_from_html(info.raw_data, m_url)
                m_links.update( extract_from_text(info.raw_data, m_url) )
            elif info.information_type == Information.INFORMATION_PLAIN_TEXT:
                m_links = extract_from_text(info.raw_data, m_url)
            else:
                raise Exception("Internal error!")
        m_links.difference_update(base_urls)

        Logger.log("2")

        # If we have no links, abort now
        if not m_links:
            Logger.log_verbose("No output links found.")
            return

        # Do not follow URLs that contain certain keywords
        m_forbidden = WordListLoader.get_wordlist_as_raw(
            Config.plugin_config["wordlist_no_spider"])
        m_urls_allowed = {
            url for url in m_links
            if url and not any(x in url for x in m_forbidden)
        }

        Logger.log("3")

        # Get only output links
        m_output_links = []
        for url in m_urls_allowed:
            try:
                if url not in Config.audit_scope:
                    m_output_links.append(url)
            except Exception, e:
                Logger.log_error_more_verbose(format_exc())