def load_wordlists(wordlists): """ Load the with names pased as parameter. This function receives a list of names of wordlist, defined in plugin configuration file, and return a dict with instances of wordlists. :param wordlists: list with wordlists names :type wordlists: list :returns: A dict with wordlists :rtype: dict """ m_tmp_wordlist = {} # Get wordlist to load for l_w in wordlists: for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems( ): if wordlist_family.lower() in l_w.lower(): m_tmp_wordlist[l_w] = l_wordlists # Load the wordlist m_return = {} for k, w_paths in m_tmp_wordlist.iteritems(): m_return[k] = [WordListLoader.get_wordlist_as_list(w) for w in w_paths] return m_return
def load_wordlists(wordlists): """ Load the with names pased as parameter. This function receives a list of names of wordlist, defined in plugin configuration file, and return a dict with instances of wordlists. :param wordlists: list with wordlists names :type wordlists: list :returns: A dict with wordlists :rtype: dict """ m_tmp_wordlist = {} # Get wordlist to load for l_w in wordlists: for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems(): if wordlist_family.lower() in l_w.lower(): m_tmp_wordlist[l_w] = l_wordlists # Load the wordlist m_return = {} for k, w_paths in m_tmp_wordlist.iteritems(): m_return[k] = [WordListLoader.get_wordlist_as_list(w) for w in w_paths] return m_return
def __detect_wordpress_installation(self, url, wordpress_urls): """ Try to detect a wordpress instalation in the current path. :param url: URL where try to find the WordPress installation. :type url: str :param wordpress_urls: string with wordlist name with WordPress URLs. :type wordpress_urls: str :return: True if wordpress installation found. False otherwise. :rtype: bool """ Logger.log_more_verbose( "Detecting Wordpress instalation in URI: '%s'." % url) total_urls = 0 urls_found = 0 error_page = get_error_page(url).raw_data for u in WordListLoader.get_wordlist_as_list(wordpress_urls): total_urls += 1 tmp_url = urljoin(url, u) r = HTTP.get_url(tmp_url, use_cache=False) if r.status == "200": # Try to detect non-default error pages ratio = get_diff_ratio(r.raw_response, error_page) if ratio < 0.35: urls_found += 1 discard_data(r) # If Oks > 85% continue if (urls_found / float(total_urls)) < 0.85: # If all fails, make another last test url_wp_admin = urljoin(url, "wp-admin/") try: p = HTTP.get_url(url_wp_admin, use_cache=False, allow_redirects=False) if p: discard_data(p) except Exception, e: return False if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get( "Location", ""): return True else: return False
def __detect_wordpress_installation(self, url, wordpress_urls): """ Try to detect a wordpress instalation in the current path. :param url: URL where try to find the WordPress installation. :type url: str :param wordpress_urls: string with wordlist name with WordPress URLs. :type wordpress_urls: str :return: True if wordpress installation found. False otherwise. :rtype: bool """ Logger.log_more_verbose("Detecting Wordpress instalation in URI: '%s'." % url) total_urls = 0 urls_found = 0 error_page = get_error_page(url).raw_data for u in WordListLoader.get_wordlist_as_list(wordpress_urls): total_urls += 1 tmp_url = urljoin(url, u) r = HTTP.get_url(tmp_url, use_cache=False) if r.status == "200": # Try to detect non-default error pages ratio = get_diff_ratio(r.raw_response, error_page) if ratio < 0.35: urls_found += 1 discard_data(r) # If Oks > 85% continue if (urls_found / float(total_urls)) < 0.85: # If all fails, make another last test url_wp_admin = urljoin(url, "wp-admin/") try: p = HTTP.get_url(url_wp_admin, use_cache=False, allow_redirects=False) if p: discard_data(p) except Exception, e: return False if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get("Location", ""): return True else: return False
def run(self, info): # Make sure it's a CNAME record. # This is required because a plugin can't ask for a specific DNS # register type - all types are received together. if info.type != "CNAME": return # Get the root domain. root = info.target Logger.log_verbose( "Looking for poisoned domains at: *.%s" % root) # Load the malware URLs list. wordlist_filename = Config.plugin_args["wordlist"] try: wordlist = WordListLoader.get_wordlist_as_list( wordlist_filename) except WordlistNotFound: Logger.log_error_verbose( "Wordlist not found: " + wordlist_filename) return except TypeError: Logger.log_error_verbose( "Wordlist is not a file: " + wordlist_filename) return results = [] root_set = set([root]) for x in root_set.intersection(set(wordlist)): results.append(DNSPoisoning(info, x)) # Log how many results we got. if results: Logger.log_verbose( "Discovered %s poisoned domains." % len(results)) else: Logger.log_verbose("No poisoned domains found.") # Return the results. return results
def get_list_from_wordlist(wordlist): """ Load the content of the wordlist and return a set with the content. :param wordlist: wordlist name. :type wordlist: str :return: a set with the results. :rtype result_output: set """ try: m_commom_wordlists = set() for v in Config.plugin_extra_config[wordlist].itervalues(): m_commom_wordlists.update(WordListLoader.get_wordlist_as_list(v)) return m_commom_wordlists except KeyError, e: Logger.log_error_more_verbose(str(e)) return set()
def get_list_from_wordlist(wordlist): """ Load the content of the wordlist and return a set with the content. :param wordlist: wordlist name. :type wordlist: str :return: a set with the results. :rtype result_output: set """ try: m_commom_wordlists = set() for v in Config.plugin_extra_config[wordlist].itervalues(): m_commom_wordlists.update(WordListLoader.get_wordlist_as_list(v)) return m_commom_wordlists except KeyError,e: Logger.log_error_more_verbose(str(e)) return set()
def run(self, info): # Make sure it's a CNAME record. # This is required because a plugin can't ask for a specific DNS # register type - all types are received together. if info.type != "CNAME": return # Get the root domain. root = info.target Logger.log_verbose("Looking for poisoned domains at: *.%s" % root) # Load the malware URLs list. wordlist_filename = Config.plugin_args["wordlist"] try: wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename) except WordlistNotFound: Logger.log_error_verbose("Wordlist not found: " + wordlist_filename) return except TypeError: Logger.log_error_verbose("Wordlist is not a file: " + wordlist_filename) return results = [] root_set = set([root]) for x in root_set.intersection(set(wordlist)): results.append(DNSPoisoning(info, x)) # Log how many results we got. if results: Logger.log_verbose("Discovered %s poisoned domains." % len(results)) else: Logger.log_verbose("No poisoned domains found.") # Return the results. return results
class PredictablesDisclosureBruteforcer(TestingPlugin): #-------------------------------------------------------------------------- def get_accepted_types(self): return [FolderURL] #-------------------------------------------------------------------------- def run(self, info): m_url = info.url Logger.log_more_verbose("Start to process URL: %r" % m_url) # Server specified by param? webserver_finger = Config.plugin_args.get("server_banner", None) if webserver_finger: server_canonical_name = webserver_finger servers_related = [] # Set with related web servers else: # User fingerprint info webserver_finger = info.get_associated_informations_by_category( WebServerFingerprint.information_type) if webserver_finger: webserver_finger = webserver_finger.pop() server_canonical_name = webserver_finger.canonical_name servers_related = webserver_finger.related # Set with related web servers wordlist = set() # Common wordlists try: w = Config.plugin_extra_config["common"] wordlist.update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error("Can't load common wordlists") # There is fingerprinting information? if webserver_finger: # # Load wordlists # wordlist_update = wordlist.update # Wordlist of server name try: w = Config.plugin_extra_config["%s_predictables" % server_canonical_name] wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error( "Can't load predictables wordlists for server: '%s'." % server_canonical_name) # Wordlist of related with the server found try: for l_servers_related in servers_related: w = Config.plugin_extra_config["%s_predictables" % l_servers_related] wordlist_update([l_w for l_w in w.itervalues()]) except KeyError, e: Logger.log_error( "Can't load wordlists predictables wordlists for related webserver: '%s'" % e) # Load content of wordlists urls = set() m_urls_update = urls.add for l_w in wordlist: # Use a copy of wordlist to avoid modify the original source l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w) for l_wo in l_loaded_wordlist: try: l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo tmp_u = urljoin(m_url, l_wo) except ValueError, e: Logger.log_error( "Failed to parse key, from wordlist, '%s'" % tmp_u) continue m_urls_update(tmp_u)
class Spider(TestingPlugin): """ This plugin is a web spider. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [URL] #-------------------------------------------------------------------------- def run(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %s" % m_url) # Check if need follow first redirect, then follow the link. p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_error_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links m_forms = None if p.information_type == HTML.data_subtype: m_links = extract_from_html(p.raw_data, m_url) m_forms = extract_forms_from_html(p.raw_data, m_url) #m_links.update( extract_from_text(p.raw_data, m_url) ) elif p.information_type == Text.data_subtype: m_links = extract_from_text(p.raw_data, m_url) else: return m_return try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_more_verbose("No links found in URL: %s" % m_url) # Convert to URL data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = URL(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) print m_resource m_resource.add_resource(info) m_return.append(m_resource) # Get forms info if m_forms: m_forms_allowed = [ url for url in m_forms if not any(x in url[0] for x in m_forbidden) ] m_forms_not_allowed = {x[0] for x in m_forms }.difference(x[0] for x in m_forms_allowed) else: m_forms_allowed = [] m_forms_not_allowed = set() if m_forms_not_allowed: Logger.log_more_verbose("Skipped forbidden forms:\n %s" % "\n ".join(sorted(m_forms_not_allowed))) # Do not follow forms out of scope m_forms_in_scope = [] m_broken = [] for url in m_forms_allowed: try: if url[0] in Config.audit_scope: m_forms_in_scope.append(url) except Exception: m_broken.append(url[0]) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable forms: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable forms:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_forms_allowed) - len( m_forms_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d forms out of scope." % m_out_of_scope_count) if m_forms_in_scope: Logger.log_verbose("Found %d forms in URL: %s" % (len(m_forms_in_scope), m_url)) else: Logger.log_more_verbose("No forms found in URL: %s" % m_url) # Convert to URL data type for u in m_forms_in_scope: try: url = u[0] method = u[1] params = {x["name"]: x["value"] for x in u[2]} m_resource = URL(url=url, referer=m_url, method=method, post_params=params) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def check_download(self, url, name, content_length, content_type): # Only accept content when the content type header is present. if not content_type: Logger.log_more_verbose("Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose("Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. parsed_url = parse_url(url) if not parsed_url.filename: # Approved! return True # Extension absent. if not parsed_url.extension: # Approved! return True # Match against a known list of valid HTML extensions. # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage if parsed_url.extension in (".xml", ".html", ".htm", ".xhtml", ".xht", ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml", ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso", ".lassoapp", ".pl", ".php", ".php3", ".phtml", ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom", ".xml", ".eml", ".jsonld", ".metalink", ".met", ".rss", ".xml", ".markdown"): # Approved! return True # If URL path in blacklist? m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] if any(x in url for x in m_forbidden): return False # Success! return True
def run(self, info): # Get the root domain only. root = info.root # Skip localhost. if root == "localhost": return # Skip root domains we've already processed. if self.state.put(root, True): return # Load the subdomains wordlist. try: wordlist = WordListLoader.get_wordlist_as_list(Config.plugin_args["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"]) return # Load the subdomains whitelist. try: whitelist = WordListLoader.get_wordlist_as_list(Config.plugin_config["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_config["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_config["wordlist"]) return # # Set a base line for dinamyc sub-domains # m_virtual_domains = [] for v in (generate_random_string(40) for x in xrange(3)): l_subdomain = ".".join((v, root)) records = DNS.get_a(l_subdomain, also_CNAME=True) for rec in records: if rec.type == "CNAME": m_virtual_domains.append(rec.target) # If 3 subdomains are the same, set the base domain m_base_domain = None if len(set(m_virtual_domains)) == 1: m_base_domain = m_virtual_domains[0] # Configure the progress notifier. self.progress.set_total(len(wordlist)) self.progress.min_delta = 1 # notify every 1% # For each subdomain in the wordlist... found = 0 results = [] visited = set() for prefix in wordlist: # Mark as completed before actually trying. # We can't put this at the end of the loop where it belongs, # because the "continue" statements would skip over this too. self.progress.add_completed() # Build the domain name. name = ".".join((prefix, root)) # Skip if out of scope. if name not in Config.audit_scope: continue # Resolve the subdomain. records = DNS.get_a(name, also_CNAME=True) records.extend( DNS.get_aaaa(name, also_CNAME=True) ) # If no DNS records were found, skip. if not records: continue # If CNAME is the base domain, skip chk = [True for x in records if x.type == "CNAME" and x.target == m_base_domain] if len(chk) > 0 and all(chk): continue # We found a subdomain! found += 1 Logger.log_more_verbose( "Subdomain found: %s" % name) # Create the Domain object for the subdomain. domain = Domain(name) results.append(domain) # # Check for Domain disclosure # if prefix not in whitelist: d = DomainDisclosure(domain, risk = 0, level = "low", title = "Possible subdomain leak", description = "A subdomain was discovered which may be an unwanted information disclosure." ) results.append(d) # For each DNs record, grab the address or name. # Skip duplicated records. for rec in records: if rec.type == "CNAME": location = rec.target elif rec.type in ("A", "AAAA"): location = rec.address else: # should not happen... results.append(rec) domain.add_information(rec) continue if location not in visited: visited.add(location) results.append(rec) domain.add_information(rec) # Log the results. if found: Logger.log( "Found %d subdomains for root domain: %s" % (found, root)) else: Logger.log_verbose( "No subdomains found for root domain: %s" % root) # Return the results. return results
urls = set() #Logger.log(urls) for l_w in new_file: try: l_w = l_w[1:] if l_w.startswith("/") else l_w tmp_u = urljoin(m_url, l_w) except ValueError, e: Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u) continue urls.add(tmp_u) for l_w in wordlist: # Use a copy of wordlist to avoid modify the original source l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w) for l_wo in l_loaded_wordlist: try: l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo tmp_u = urljoin(m_url, l_wo) except ValueError, e: Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u) continue urls.add(tmp_u) Logger.log_verbose("Loaded %s URLs to test." % len(urls)) # Generates the error page error_response = get_error_page(m_url)
def check_download(self, url, name, content_length, content_type): # Only accept content when the content type header is present. if not content_type: Logger.log_more_verbose( "Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose( "Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. parsed_url = parse_url(url) if not parsed_url.filename: # Approved! return True # Extension absent. if not parsed_url.extension: # Approved! return True # Match against a known list of valid HTML extensions. # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage if parsed_url.extension in ( ".xml", ".html", ".htm", ".xhtml", ".xht", ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml", ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso", ".lassoapp", ".pl", ".php", ".php3", ".phtml", ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom", ".xml", ".eml", ".jsonld", ".metalink", ".met", ".rss", ".xml", ".markdown"): # Approved! return True # If URL path in blacklist? m_forbidden = [x for x in WordListLoader.get_wordlist_as_list(Config.plugin_config["wordlist_no_spider"])] if any(x in url for x in m_forbidden): return False # Success! return True
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error("Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update(extract_from_text(info.raw_data, m_url)) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist_as_raw( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_wordlist_as_list( wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error( "Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update( extract_from_text(info.raw_data, m_url) ) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist_as_raw( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())