def recv_info(self, info): m_parsed_url = info.parsed_url m_results = [] #------------------------------------------------------------------ # Find suspicious URLs by matching against known substrings. # Load wordlists m_wordlist_middle = WordListLoader.get_wordlist(Config.plugin_config['middle']) m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions']) # Add matching keywords at any positions of URL. m_results.extend([SuspiciousURLPath(info, x) for x in m_wordlist_middle if x in m_parsed_url.directory.split("/") or x == m_parsed_url.filebase or x == m_parsed_url.extension]) # Add matching keywords at any positions of URL. m_results.extend([SuspiciousURLPath(info, x) for x in m_wordlist_extensions if m_parsed_url.extension == x]) #------------------------------------------------------------------ # Find suspicious URLs by calculating the Shannon entropy of the hostname. # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py # TODO: test with unicode enabled hostnames! # Check the Shannon entropy for the hostname. hostname = info.parsed_url.hostname entropy = calculate_shannon_entropy(hostname) if entropy > 4.0: m_results.append( SuspiciousURLPath(info, hostname) ) # Check the Shannon entropy for the subdomains. for subdomain in info.parsed_url.hostname.split('.'): if len(subdomain) > 3: entropy = calculate_shannon_entropy(subdomain) if entropy > 4.0: m_results.append( SuspiciousURLPath(info, subdomain) ) #------------------------------------------------------------------ # # # # Get malware suspicious links # # # #------------------------------------------------------------------ p = None m_url = info.url Logger.log_more_verbose("Looking for output links to malware sites") try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException,e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))
def test_all_wordlist_property(self): # Set Config plugin LocalFile._LocalFile__plugin_path = os.path.abspath(W_DIR) # Create plugin wordlists _create_plugin_info() # Clean and configure new store WordListLoader._WordListLoader__store = {} WordListLoader._WordListLoader__load_wordlists(W_DIR) try: assert WordListLoader.all_wordlists == ["test_wordlist.txt"] finally: _destroy_plugin_info()
def recv_info(self, info): m_parsed_url = info.parsed_url m_results = [] #------------------------------------------------------------------ # Find suspicious URLs by matching against known substrings. # Load wordlists m_wordlist_middle = WordListLoader.get_wordlist( Config.plugin_config['middle']) m_wordlist_extensions = WordListLoader.get_wordlist( Config.plugin_config['extensions']) # Add matching keywords at any positions of URL. m_results.extend([ SuspiciousURL(info, x) for x in m_wordlist_middle if x in m_parsed_url.directory.split("/") or x == m_parsed_url.filebase or x == m_parsed_url.extension ]) # Add matching keywords at any positions of URL. m_results.extend([ SuspiciousURL(info, x) for x in m_wordlist_extensions if m_parsed_url.extension == x ]) #------------------------------------------------------------------ # Find suspicious URLs by calculating the Shannon entropy of the hostname. # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py # TODO: test with unicode enabled hostnames! # Check the Shannon entropy for the hostname. hostname = info.parsed_url.hostname entropy = calculate_shannon_entropy(hostname) if entropy > 4.0: m_results.append(SuspiciousURL(info, hostname)) # Check the Shannon entropy for the subdomains. for subdomain in info.parsed_url.hostname.split('.'): if len(subdomain) > 3: entropy = calculate_shannon_entropy(subdomain) if entropy > 4.0: m_results.append(SuspiciousURL(info, subdomain)) #------------------------------------------------------------------ return m_results
def load_wordlists(wordlists): """ Load the with names pased as parameter. This function receives a list of names of wordlist, defined in plugin configuration file, and return a dict with instances of wordlists. :param wordlists: list with wordlists names :type wordlists: list :returns: A dict with wordlists :rtype: dict """ m_tmp_wordlist = {} # Get wordlist to load for l_w in wordlists: for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems( ): if wordlist_family.lower() in l_w.lower(): m_tmp_wordlist[l_w] = l_wordlists # Load the wordlist m_return = {} for k, w_paths in m_tmp_wordlist.iteritems(): m_return[k] = [WordListLoader.get_wordlist_as_list(w) for w in w_paths] return m_return
def load_wordlists(wordlists): """ Load the with names pased as parameter. This function receives a list of names of wordlist, defined in plugin configuration file, and return a dict with instances of wordlists. :param wordlists: list with wordlists names :type wordlists: list :returns: A dict with wordlists :rtype: dict """ m_tmp_wordlist = {} # Get wordlist to load for l_w in wordlists: for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems(): if wordlist_family.lower() in l_w.lower(): m_tmp_wordlist[l_w] = l_wordlists # Load the wordlist m_return = {} for k, w_paths in m_tmp_wordlist.iteritems(): m_return[k] = [WordListLoader.get_wordlist(w) for w in w_paths] return m_return
def get_fingerprinting_wordlist(wordlist): """ Load the wordlist of fingerprints and prepare the info in a dict. It using as a keys the name of the server family and, as value, an iterable with the keywords related with this web server. :return: The results of load of webservers keywords info and related webservers. :rtype: tuple(WEBSERVER_KEYWORDS, RELATED_SERVES) <=> (dict(SERVERNAME: set(str(KEYWORDS))), dict(SERVER_NAME, set(str(RELATED_SERVERS))) """ # Load the wordlist m_w = WordListLoader.get_advanced_wordlist_as_dict(wordlist, separator=";", smart_load=True) # Load references. # # References in the wordlist are specified by # prefix. # already_parsed = set() related = defaultdict(set) m_webservers_keys = extend_items(m_w, already_parsed, related) return (m_webservers_keys, related)
def analyze_url(self, info): m_parsed_url = info.parsed_url m_results = [] Logger.log_more_verbose("Processing URL: %s" % m_parsed_url) #---------------------------------------------------------------------- # Find suspicious URLs by matching against known substrings. # Load wordlists m_wordlist_middle = WordListLoader.get_wordlist(Config.plugin_config['middle']) m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions']) # Add matching keywords at any positions of URL. m_results.extend([SuspiciousURLPath(info, x) for x in m_wordlist_middle if x in m_parsed_url.directory.split("/") or x == m_parsed_url.filebase or x == m_parsed_url.extension]) # Add matching keywords at any positions of URL. m_results.extend([SuspiciousURLPath(info, x) for x in m_wordlist_extensions if m_parsed_url.extension == x]) #---------------------------------------------------------------------- # Find suspicious URLs by calculating the Shannon entropy of the hostname. # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py # TODO: test with unicode enabled hostnames! # Check the Shannon entropy for the hostname. hostname = info.parsed_url.hostname entropy = calculate_shannon_entropy(hostname) if entropy > 4.0: m_results.append( SuspiciousURLPath(info, hostname) ) # Check the Shannon entropy for the subdomains. for subdomain in info.parsed_url.hostname.split('.'): if len(subdomain) > 3: entropy = calculate_shannon_entropy(subdomain) if entropy > 4.0: m_results.append( SuspiciousURLPath(info, subdomain) ) return m_results
def __detect_wordpress_installation(self, url, wordpress_urls): """ Try to detect a wordpress instalation in the current path. :param url: URL where try to find the WordPress installation. :type url: str :param wordpress_urls: string with wordlist name with WordPress URLs. :type wordpress_urls: str :return: True if wordpress installation found. False otherwise. :rtype: bool """ Logger.log_more_verbose( "Detecting Wordpress instalation in URI: '%s'." % url) total_urls = 0 urls_found = 0 error_page = get_error_page(url).raw_data for u in WordListLoader.get_wordlist(wordpress_urls): total_urls += 1 tmp_url = urljoin(url, u) r = HTTP.get_url(tmp_url, use_cache=False) if r.status == "200": # Try to detect non-default error pages ratio = get_diff_ratio(r.raw_response, error_page) if ratio < 0.35: urls_found += 1 discard_data(r) # If Oks > 85% continue if (urls_found / float(total_urls)) < 0.85: # If all fails, make another last test url_wp_admin = urljoin(url, "wp-admin/") try: p = HTTP.get_url(url_wp_admin, use_cache=False, allow_redirects=False) if p: discard_data(p) except Exception, e: return False if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get( "Location", ""): return True else: return False
def __detect_wordpress_installation(self, url, wordpress_urls): """ Try to detect a wordpress instalation in the current path. :param url: URL where try to find the WordPress installation. :type url: str :param wordpress_urls: string with wordlist name with WordPress URLs. :type wordpress_urls: str :return: True if wordpress installation found. False otherwise. :rtype: bool """ Logger.log_more_verbose("Detecting Wordpress instalation in URI: '%s'." % url) total_urls = 0 urls_found = 0 error_page = get_error_page(url).raw_data for u in WordListLoader.get_wordlist(wordpress_urls): total_urls += 1 tmp_url = urljoin(url, u) r = HTTP.get_url(tmp_url, use_cache=False) if r.status == "200": # Try to detect non-default error pages ratio = get_diff_ratio(r.raw_response, error_page) if ratio < 0.35: urls_found += 1 discard_data(r) # If Oks > 85% continue if (urls_found / float(total_urls)) < 0.85: # If all fails, make another last test url_wp_admin = urljoin(url, "wp-admin/") try: p = HTTP.get_url(url_wp_admin, use_cache=False, allow_redirects=False) if p: discard_data(p) except Exception, e: return False if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get("Location", ""): return True else: return False
def test__get_wordlist_descriptor_exits_in_plugin_path(self): # Config plugin LocalFile._LocalFile__plugin_path = os.path.abspath(W_DIR) _create_plugin_info() try: wordlist_file = WordListLoader._WordListLoader__get_wordlist_descriptor(W_PATH) # Checks if wordlist is file wordlist_file == wordlist_file == open(W_PATH, "rU") # Checks if wordlist is non file pytest.raises(WordlistNotFound, WordListLoader._WordListLoader__get_wordlist_descriptor, "plugin_tmp_dir") finally: _destroy_plugin_info()
def recv_info(self, info): # Make sure it's a CNAME record. # This is required because a plugin can't ask for a specific DNS # register type - all types are received together. if info.type != "CNAME": return # Get the root domain. root = info.target Logger.log_verbose( "Looking for poisoned domains at: *.%s" % root) # Load the malware URLs list. wordlist_filename = Config.plugin_args["wordlist"] try: wordlist = WordListLoader.get_advanced_wordlist_as_list( wordlist_filename) except WordlistNotFound: Logger.log_error_verbose( "Wordlist not found: " + wordlist_filename) return except TypeError: Logger.log_error_verbose( "Wordlist is not a file: " + wordlist_filename) return results = [] root_set = set([root]) for x in root_set.intersection(set(wordlist)): v = DNSPoisoning(x) v.add_information(info) results.append(v) # Log how many results we got. if results: Logger.log_verbose( "Discovered %s poisoned domains." % len(results)) else: Logger.log_verbose("No poisoned domains found.") # Return the results. return results
def test__get_wordlist_descriptor_exits_abs_path(self): # Config plugin LocalFile._LocalFile__plugin_path = os.getcwd() _create_plugin_info() try: wordlist_file = WordListLoader._WordListLoader__get_wordlist_descriptor( W_PATH) # Checks if wordlist is file wordlist_file == open(W_PATH, "rU") # Checks if wordlist is non file pytest.raises( WordlistNotFound, WordListLoader._WordListLoader__get_wordlist_descriptor, W_DIR) finally: _destroy_plugin_info()
def run(self, info): # Make sure it's a CNAME record. # This is required because a plugin can't ask for a specific DNS # register type - all types are received together. if info.type != "CNAME": return # Get the root domain. root = info.target Logger.log_verbose("Looking for poisoned domains at: *.%s" % root) # Load the malware URLs list. wordlist_filename = Config.plugin_args["wordlist"] try: wordlist = WordListLoader.get_advanced_wordlist_as_list( wordlist_filename) except WordlistNotFound: Logger.log_error_verbose("Wordlist not found: " + wordlist_filename) return except TypeError: Logger.log_error_verbose("Wordlist is not a file: " + wordlist_filename) return results = [] root_set = set([root]) for x in root_set.intersection(set(wordlist)): results.append(DNSPoisoning(info, x)) # Log how many results we got. if results: Logger.log_verbose("Discovered %s poisoned domains." % len(results)) else: Logger.log_verbose("No poisoned domains found.") # Return the results. return results
def ttl_platform_detection(self, main_url): """ This function tries to recognize the remote platform doing a ping and analyzing the TTL of IP header response. :param main_url: Base url to test. :type main_url: str :return: Possible platforms. :rtype: list(tuple(OS, version)) """ # Do a ping try: m_ttl = do_ping_and_receive_ttl(ParsedURL(main_url).hostname, 2) # Load words for the wordlist l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict( Config.plugin_extra_config["Wordlist_ttl"]["ttl"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(m_ttl) if l_matches: m_ret = {} for v in l_matches: sp = v.split("|") k = sp[0].strip() v = sp[1].strip() m_ret[k] = v return [(k, v) for k, v in m_ret.iteritems()] else: return {} except EnvironmentError: Logger.log_error( "[!] You can't run the platform detection plugin if you're not root." ) return {} except Exception, e: Logger.log_error("[!] Platform detection failed, reason: %s" % e) return {}
def get_list_from_wordlist(wordlist): """ Load the content of the wordlist and return a set with the content. :param wordlist: wordlist name. :type wordlist: str :return: a set with the results. :rtype result_output: set """ try: m_commom_wordlists = set() for v in Config.plugin_extra_config[wordlist].itervalues(): m_commom_wordlists.update(WordListLoader.get_wordlist_as_list(v)) return m_commom_wordlists except KeyError, e: Logger.log_error_more_verbose(str(e)) return set()
def get_list_from_wordlist(wordlist): """ Load the content of the wordlist and return a set with the content. :param wordlist: wordlist name. :type wordlist: str :return: a set with the results. :rtype result_output: set """ try: m_commom_wordlists = set() for v in Config.plugin_extra_config[wordlist].itervalues(): m_commom_wordlists.update(WordListLoader.get_advanced_wordlist_as_list(v)) return m_commom_wordlists except KeyError,e: Logger.log_error_more_verbose(str(e)) return set()
def recv_info(self, info): # Make sure it's really a CNAME record. # This check should never fail anyway! if info.type != "CNAME": Logger.log_error_verbose("No CNAME found, skipped.") return # Get the root domain. root = info.target Logger.log_verbose("Looking for poisoned domains at: *.%s" % root) # Load the malware URLs list. wordlist_filename = Config.plugin_args["wordlist"] try: wordlist = WordListLoader.get_advanced_wordlist_as_list(wordlist_filename) except WordlistNotFound: Logger.log_error_verbose("Wordlist not found: " + wordlist_filename) return except TypeError: Logger.log_error_verbose("Wordlist is not a file: " + wordlist_filename) return results = [] root_set = set([root]) for x in root_set.intersection(set(wordlist)): v = DNSPoisoning(x) v.add_information(info) results.append(v) # Log how many results we got. if results: Logger.log_verbose("Discovered %s poisoned domains." % len(results)) else: Logger.log_verbose("No poisoned domains found.") # Return the results. return results
def ttl_platform_detection(self, main_url): """ This function tries to recognize the remote platform doing a ping and analyzing the TTL of IP header response. :param main_url: Base url to test. :type main_url: str :return: Possible platforms. :rtype: list(tuple(OS, version)) """ # Do a ping try: m_ttl = do_ping_and_receive_ttl(ParsedURL(main_url).hostname, 2) # Load words for the wordlist l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config["Wordlist_ttl"]["ttl"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(m_ttl) if l_matches: m_ret = {} for v in l_matches: sp = v.split("|") k = sp[0].strip() v = sp[1].strip() m_ret[k] = v return [(k,v) for k,v in m_ret.iteritems()] else: return {} except EnvironmentError: Logger.log_error("[!] You can't run the platform detection plugin if you're not root.") return {} except Exception, e: Logger.log_error("[!] Platform detection failed, reason: %s" % e) return {}
def recv_info(self, info): m_domain = info.root # Skips localhost if m_domain == "localhost": return m_return = None # Checks if the hostname has been already processed if not self.state.check(m_domain): # # Looking for # m_subdomains = WordListLoader.get_advanced_wordlist_as_list("subs_small.txt") # Run in parallel self.base_domain = m_domain self.completed = Counter(0) self.total = len(m_subdomains) r = pmap(self.get_subdomains_bruteforcer, m_subdomains, pool_size=10) # # Remove repeated # # The results m_domains = set() m_domains_add = m_domains.add m_domains_already = [] m_domains_already_append = m_domains_already.append m_ips = set() m_ips_add = m_ips.add m_ips_already = [] m_ips_already_append = m_ips_already.append if r: for doms in r: for dom in doms: # Domains if dom.type == "CNAME": if not dom.target in m_domains_already: m_domains_already_append(dom.target) if dom.target in Config.audit_scope: m_domains_add(dom) else: discard_data(dom) # IPs if dom.type == "A": if dom.address not in m_ips_already: m_ips_already_append(dom.address) m_ips_add(dom) # Unify m_domains.update(m_ips) m_return = m_domains # Add the information to the host map(info.add_information, m_return) # Set the domain as processed self.state.set(m_domain, True) Logger.log_verbose("DNS analyzer plugin found %d subdomains" % len(m_return)) # Write the info as more user friendly if Logger.MORE_VERBOSE: m_tmp = [] m_tmp_append = m_tmp.append for x in m_return: if getattr(x, "address", False): m_tmp_append("%s (%s)" % (getattr(x, "address"), str(x))) elif getattr(x, "target", False): m_tmp_append("%s (%s)" % (getattr(x, "target"), str(x))) else: m_tmp_append(str(x)) Logger.log_more_verbose("Subdomains found: \n\t+ %s" % "\n\t+ ".join(m_tmp)) return m_return
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_advanced_wordlist_as_list( wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error( "Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update( extract_from_text(info.raw_data, m_url) ) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())
def check_download(self, url, name, content_length, content_type): # Only accept content when the content type header is present. if not content_type: Logger.log_more_verbose( "Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose( "Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. parsed_url = parse_url(url) if not parsed_url.filename: # Approved! return True # Extension absent. if not parsed_url.extension: # Approved! return True # Match against a known list of valid HTML extensions. # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage if parsed_url.extension in ( ".xml", ".html", ".htm", ".xhtml", ".xht", ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml", ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso", ".lassoapp", ".pl", ".php", ".php3", ".phtml", ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom", ".xml", ".eml", ".jsonld", ".metalink", ".met", ".rss", ".xml", ".markdown"): # Approved! return True # If URL path in blacklist? m_forbidden = [x for x in WordListLoader.get_wordlist(Config.plugin_config["wordlist_no_spider"])] if any(x in url for x in m_forbidden): return False # Success! return True
class PredictablesDisclosureBruteforcer(TestingPlugin): #-------------------------------------------------------------------------- def get_accepted_types(self): return [FolderURL] #-------------------------------------------------------------------------- def run(self, info): m_url = info.url Logger.log_more_verbose("Start to process URL: %r" % m_url) # Server specified by param? webserver_finger = Config.plugin_args.get("server_banner", None) if webserver_finger: server_canonical_name = webserver_finger servers_related = [] # Set with related web servers else: # User fingerprint info webserver_finger = info.get_associated_informations_by_category( WebServerFingerprint.information_type) if webserver_finger: webserver_finger = webserver_finger.pop() server_canonical_name = webserver_finger.canonical_name servers_related = webserver_finger.related # Set with related web servers wordlist = set() # Common wordlists try: w = Config.plugin_extra_config["common"] wordlist.update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error("Can't load common wordlists") # There is fingerprinting information? if webserver_finger: # # Load wordlists # wordlist_update = wordlist.update # Wordlist of server name try: w = Config.plugin_extra_config["%s_predictables" % server_canonical_name] wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error( "Can't load predictables wordlists for server: '%s'." % server_canonical_name) # Wordlist of related with the server found try: for l_servers_related in servers_related: w = Config.plugin_extra_config["%s_predictables" % l_servers_related] wordlist_update([l_w for l_w in w.itervalues()]) except KeyError, e: Logger.log_error( "Can't load wordlists predictables wordlists for related webserver: '%s'" % e) # Load content of wordlists urls = set() m_urls_update = urls.add for l_w in wordlist: # Use a copy of wordlist to avoid modify the original source l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w) for l_wo in l_loaded_wordlist: try: l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo tmp_u = urljoin(m_url, l_wo) except ValueError, e: Logger.log_error( "Failed to parse key, from wordlist, '%s'" % tmp_u) continue m_urls_update(tmp_u)
def http_analyzers(main_url, update_status_func, number_of_entries=4): """ Analyze HTTP headers for detect the web server. Return a list with most possible web servers. :param main_url: Base url to test. :type main_url: str :param update_status_func: function used to update the status of the process :type update_status_func: function :param number_of_entries: number of resutls tu return for most probable web servers detected. :type number_of_entries: int :return: Web server family, Web server version, Web server complete description, related web servers (as a dict('SERVER_RELATED' : set(RELATED_NAMES))), others web server with their probabilities as a dict(CONCRETE_WEB_SERVER, PROBABILITY) """ # Load wordlist directly related with a HTTP fields. # { HTTP_HEADER_FIELD : [wordlists] } m_wordlists_HTTP_fields = { "Accept-Ranges" : "accept-range", "Server" : "banner", "Cache-Control" : "cache-control", "Connection" : "connection", "Content-Type" : "content-type", "WWW-Authenticate" : "htaccess-realm", "Pragma" : "pragma", "X-Powered-By" : "x-powered-by" } m_actions = { 'GET' : { 'wordlist' : 'Wordlist_get' , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': '/' }, 'LONG_GET' : { 'wordlist' : 'Wordlist_get_long' , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': '/%s' % ('a' * 200) }, 'NOT_FOUND' : { 'wordlist' : 'Wordlist_get_notfound' , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': '/404_NOFOUND__X02KAS' }, 'HEAD' : { 'wordlist' : 'Wordlist_head' , 'weight' : 3 , 'protocol' : 'HTTP/1.1', 'method' : 'HEAD' , 'payload': '/' }, 'OPTIONS' : { 'wordlist' : 'Wordlist_options' , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'OPTIONS' , 'payload': '/' }, 'DELETE' : { 'wordlist' : 'Wordlist_delete' , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'DELETE' , 'payload': '/' }, 'TEST' : { 'wordlist' : 'Wordlist_attack' , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'TEST' , 'payload': '/' }, 'INVALID' : { 'wordlist' : 'Wordlist_wrong_method' , 'weight' : 5 , 'protocol' : 'HTTP/9.8', 'method' : 'GET' , 'payload': '/' }, 'ATTACK' : { 'wordlist' : 'Wordlist_wrong_version' , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': "/etc/passwd?format=%%%%&xss=\x22><script>alert('xss');</script>&traversal=../../&sql='%20OR%201;"} } # Store results for others HTTP params m_d = ParsedURL(main_url) m_hostname = m_d.hostname m_port = m_d.port m_debug = False # Only for develop # Counter of banners. Used when others methods fails. m_banners_counter = Counter() # Score counter m_counters = HTTPAnalyzer(debug=m_debug) # Var used to update the status m_data_len = len(m_actions) i = 1 # element in process for l_action, v in m_actions.iteritems(): if m_debug: print "###########" l_method = v["method"] l_payload = v["payload"] l_proto = v["protocol"] l_wordlist = v["wordlist"] # Each type of probe hast different weight. # # Weights go from 0 - 5 # l_weight = v["weight"] # Make the URL l_url = urljoin(main_url, l_payload) # Make the raw request #l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s:%(port)s\r\nConnection: Close\r\n\r\n" % ( l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s\r\n\r\n" % ( { "method" : l_method, "payload" : l_payload, "protocol" : l_proto, "host" : m_hostname, "port" : m_port } ) if m_debug: print "REQUEST" print l_raw_request # Do the connection l_response = None try: m_raw_request = HTTP_Raw_Request(l_raw_request) discard_data(m_raw_request) l_response = HTTP.make_raw_request( host = m_hostname, port = m_port, raw_request = m_raw_request, callback = check_raw_response) if l_response: discard_data(l_response) except NetworkException,e: Logger.log_error_more_verbose("Server-Fingerprint plugin: No response for URL (%s) '%s'. Message: %s" % (l_method, l_url, str(e))) continue if not l_response: Logger.log_error_more_verbose("No response for URL '%s'." % l_url) continue if m_debug: print "RESPONSE" print l_response.raw_headers # Update the status update_status_func((float(i) * 100.0) / float(m_data_len)) Logger.log_more_verbose("Making '%s' test." % (l_wordlist)) i += 1 # Analyze for each wordlist # # Store the server banner try: m_banners_counter[l_response.headers["Server"]] += l_weight except KeyError: pass # # ===================== # HTTP directly related # ===================== # # for l_http_header_name, l_header_wordlist in m_wordlists_HTTP_fields.iteritems(): # Check if HTTP header field is in response if l_http_header_name not in l_response.headers: continue l_curr_header_value = l_response.headers[l_http_header_name] # Generate concrete wordlist name l_wordlist_path = Config.plugin_extra_config[l_wordlist][l_header_wordlist] # Load words for the wordlist l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(l_wordlist_path) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_curr_header_value) m_counters.inc(l_matches, l_action, l_weight, l_http_header_name, message="HTTP field: " + l_curr_header_value) # # ======================= # HTTP INdirectly related # ======================= # # # # Status code # =========== # l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statuscode"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_response.status) m_counters.inc(l_matches, l_action, l_weight, "statuscode", message="Status code: " + l_response.status) # # Status text # =========== # l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statustext"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_response.reason) m_counters.inc(l_matches, l_action, l_weight, "statustext", message="Status text: " + l_response.reason) # # Header space # ============ # # Count the number of spaces between HTTP field name and their value, for example: # -> Server: Apache 1 # The number of spaces are: 1 # # -> Server:Apache 1 # The number of spaces are: 0 # l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-space"]) # Looking for matches try: l_http_value = l_response.headers[0] # get the value of first HTTP field l_spaces_num = str(abs(len(l_http_value) - len(l_http_value.lstrip()))) l_matches = l_wordlist_instance.matches_by_value(l_spaces_num) m_counters.inc(l_matches, l_action, l_weight, "header-space", message="Header space: " + l_spaces_num) except IndexError: print "index error header space" pass # # Header capitalafterdash # ======================= # # Look for non capitalized first letter of field name, for example: # -> Content-type: .... # Instead of: # -> Content-Type: .... # l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-capitalafterdash"]) # Looking for matches l_valid_fields = [x for x in l_response.headers.iterkeys() if "-" in x] if l_valid_fields: l_h = l_valid_fields[0] l_value = l_h.split("-")[1] # Get the second value: Content-type => type l_dush = None if l_value[0].isupper(): # Check first letter is lower l_dush = 1 else: l_dush = 0 l_matches = l_wordlist_instance.matches_by_value(l_dush) m_counters.inc(l_matches, l_action, l_weight, "header-capitalizedafterdush", message="Capital after dash: %s" % str(l_dush)) # # Header order # ============ # l_header_order = ','.join(l_response.headers.iterkeys()) l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-order"]) l_matches = l_wordlist_instance.matches_by_value(l_header_order) m_counters.inc(l_matches, l_action, l_weight, "header-order", message="Header order: " + l_header_order) # # Protocol name # ============ # # For a response like: # -> HTTP/1.0 200 OK # .... # # Get the 'HTTP' value. # try: l_proto = l_response.protocol # Get the 'HTTP' text from response, if available if l_proto: l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-name"]) l_matches = l_wordlist_instance.matches_by_value(l_proto) m_counters.inc(l_matches, l_action, l_weight, "proto-name", message="Proto name: " + l_proto) except IndexError: print "index error protocol name" pass # # Protocol version # ================ # # For a response like: # -> HTTP/1.0 200 OK # .... # # Get the '1.0' value. # try: l_version = l_response.version # Get the '1.0' text from response, if available if l_version: l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-version"]) l_matches = l_wordlist_instance.matches_by_value(l_version) m_counters.inc(l_matches, l_action, l_weight, "proto-version", message="Proto version: " + l_version) except IndexError: print "index error protocol version" pass if "ETag" in l_response.headers: l_etag_header = l_response.headers["ETag"] # # ETag length # ================ # l_etag_len = len(l_etag_header) l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-legth"]) l_matches = l_wordlist_instance.matches_by_value(l_etag_len) m_counters.inc(l_matches, l_action, l_weight, "etag-length", message="ETag length: " + str(l_etag_len)) # # ETag Quotes # ================ # l_etag_striped = l_etag_header.strip() if l_etag_striped.startswith("\"") or l_etag_striped.startswith("'"): l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-quotes"]) l_matches = l_wordlist_instance.matches_by_value(l_etag_striped[0]) m_counters.inc(l_matches, l_action, l_weight, "etag-quotes", message="Etag quotes: " + l_etag_striped[0]) if "Vary" in l_response.headers: l_vary_header = l_response.headers["Vary"] # # Vary delimiter # ================ # # Checks if Vary header delimiter is something like this: # -> Vary: Accept-Encoding,User-Agent # Or this: # -> Vary: Accept-Encoding, User-Agent # l_var_delimiter = ", " if l_vary_header.find(", ") else "," l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-delimiter"]) l_matches = l_wordlist_instance.matches_by_value(l_var_delimiter) m_counters.inc(l_matches, l_action, l_weight, "vary-delimiter", message="Vary delimiter: " + l_var_delimiter) # # Vary capitalizer # ================ # # Checks if Vary header delimiter is something like this: # -> Vary: Accept-Encoding,user-Agent # Or this: # -> Vary: accept-encoding,user-agent # l_vary_capitalizer = str(0 if l_vary_header == l_vary_header.lower() else 1) l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-capitalize"]) l_matches = l_wordlist_instance.matches_by_value(l_vary_capitalizer) m_counters.inc(l_matches, l_action, l_weight, "vary-capitalize", message="Vary capitalizer: " + l_vary_capitalizer) # # Vary order # ================ # # Checks order between vary values: # -> Vary: Accept-Encoding,user-Agent # Or this: # -> Vary: User-Agent,Accept-Encoding # l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-order"]) l_matches = l_wordlist_instance.matches_by_value(l_vary_header) m_counters.inc(l_matches, l_action, l_weight, "vary-order", message="Vary order: " + l_vary_header) # # ===================== # HTTP specific options # ===================== # # if l_action == "HEAD": # # HEAD Options # ============ # l_option = l_response.headers.get("Allow") if l_option: l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_option) m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="HEAD option: " + l_option) if l_action == "OPTIONS" or l_action == "INVALID" or l_action == "DELETE": if "Allow" in l_response.headers: # # Options allow # ============= # l_option = l_response.headers.get("Allow") if l_option: l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_option) m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="OPTIONS allow: " + l_action + " # " + l_option) # # Allow delimiter # =============== # l_option = l_response.headers.get("Allow") if l_option: l_var_delimiter = ", " if l_option.find(", ") else "," l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-delimited"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_var_delimiter) m_counters.inc(l_matches, l_action, l_weight, "options-delimiter", message="OPTION allow delimiter " + l_action + " # " + l_option) if "Public" in l_response.headers: # # Public response # =============== # l_option = l_response.headers.get("Public") if l_option: l_wordlist_instance = WordListLoader.get_advanced_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_option) m_counters.inc(l_matches, l_action, l_weight, "options-public", message="Public response: " + l_action + " # " + l_option)
class Spider(TestingPlugin): """ This plugin is a web spider. """ #---------------------------------------------------------------------- def get_accepted_info(self): return [Url] #---------------------------------------------------------------------- def recv_info(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links if p.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(p.raw_data, m_url) else: m_links = extract_from_text(p.raw_data, m_url) try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_in_scope), m_url)) else: Logger.log_verbose("No links found in URL: %s" % m_url) # Convert to Url data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = Url(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def recv_info(self, info): # Get the root domain only. root = info.root # Skip localhost. if root == "localhost": return # Skip root domains we've already processed. if self.state.put(root, True): return # Load the subdomains wordlist. try: wordlist = WordListLoader.get_advanced_wordlist_as_list(Config.plugin_args["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"]) return # Configure the progress notifier. self.progress.set_total(len(wordlist)) self.progress.min_delta = 1 # notify every 1% # For each subdomain in the wordlist... found = 0 results = [] visited = set() for prefix in wordlist: # Mark as completed before actually trying. # We can't put this at the end of the loop where it belongs, # because the "continue" statements would skip over this too. self.progress.add_completed() # Build the domain name. name = ".".join((prefix, root)) # Skip if out of scope. if name not in Config.audit_scope: continue # Resolve the subdomain. records = DNS.get_a(name, also_CNAME=True) records.extend( DNS.get_aaaa(name, also_CNAME=True) ) # If no DNS records were found, skip. if not records: continue # We found a subdomain! found += 1 Logger.log_more_verbose( "Subdomain found: %s" % name) # Create the Domain object for the subdomain. domain = Domain(name) results.append(domain) # For each DNs record, grab the address or name. # Skip duplicated records. for rec in records: if rec.type == "CNAME": location = rec.target elif rec.type in ("A", "AAAA"): location = rec.address else: # should not happen... results.append(rec) domain.add_information(rec) continue if location not in visited: visited.add(location) results.append(rec) domain.add_information(rec) # Log the results. if found: Logger.log( "Found %d subdomains for root domain: %s" % (found, root)) else: Logger.log_verbose( "No subdomains found for root domain: %s" % root) # Return the results. return results
def http_analyzers(main_url, update_status_func, number_of_entries=4): """ Analyze HTTP headers for detect the web server. Return a list with most possible web servers. :param main_url: Base url to test. :type main_url: str :param update_status_func: function used to update the status of the process :type update_status_func: function :param number_of_entries: number of resutls tu return for most probable web servers detected. :type number_of_entries: int :return: Web server family, Web server version, Web server complete description, related web servers (as a dict('SERVER_RELATED' : set(RELATED_NAMES))), others web server with their probabilities as a dict(CONCRETE_WEB_SERVER, PROBABILITY) """ # Load wordlist directly related with a HTTP fields. # { HTTP_HEADER_FIELD : [wordlists] } m_wordlists_HTTP_fields = { "Accept-Ranges" : "accept-range", "Server" : "banner", "Cache-Control" : "cache-control", "Connection" : "connection", "Content-Type" : "content-type", "WWW-Authenticate" : "htaccess-realm", "Pragma" : "pragma", "X-Powered-By" : "x-powered-by" } m_actions = { 'GET' : { 'wordlist' : 'Wordlist_get' , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': '/' }, 'LONG_GET' : { 'wordlist' : 'Wordlist_get_long' , 'weight' : 1 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': '/%s' % ('a' * 200) }, 'NOT_FOUND' : { 'wordlist' : 'Wordlist_get_notfound' , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': '/404_NOFOUND__X02KAS' }, 'HEAD' : { 'wordlist' : 'Wordlist_head' , 'weight' : 3 , 'protocol' : 'HTTP/1.1', 'method' : 'HEAD' , 'payload': '/' }, 'OPTIONS' : { 'wordlist' : 'Wordlist_options' , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'OPTIONS' , 'payload': '/' }, 'DELETE' : { 'wordlist' : 'Wordlist_delete' , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'DELETE' , 'payload': '/' }, 'TEST' : { 'wordlist' : 'Wordlist_attack' , 'weight' : 5 , 'protocol' : 'HTTP/1.1', 'method' : 'TEST' , 'payload': '/' }, 'INVALID' : { 'wordlist' : 'Wordlist_wrong_method' , 'weight' : 5 , 'protocol' : 'HTTP/9.8', 'method' : 'GET' , 'payload': '/' }, 'ATTACK' : { 'wordlist' : 'Wordlist_wrong_version' , 'weight' : 2 , 'protocol' : 'HTTP/1.1', 'method' : 'GET' , 'payload': "/etc/passwd?format=%%%%&xss=\x22><script>alert('xss');</script>&traversal=../../&sql='%20OR%201;"} } # Store results for others HTTP params m_d = ParsedURL(main_url) m_hostname = m_d.hostname m_port = m_d.port m_debug = False # Only for develop # Counter of banners. Used when others methods fails. m_banners_counter = Counter() # Score counter m_counters = HTTPAnalyzer(debug=m_debug) # Var used to update the status m_data_len = len(m_actions) i = 1 # element in process for l_action, v in m_actions.iteritems(): if m_debug: print "###########" l_method = v["method"] l_payload = v["payload"] l_proto = v["protocol"] l_wordlist = v["wordlist"] # Each type of probe hast different weight. # # Weights go from 0 - 5 # l_weight = v["weight"] # Make the URL l_url = urljoin(main_url, l_payload) # Make the raw request #l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s:%(port)s\r\nConnection: Close\r\n\r\n" % ( l_raw_request = "%(method)s %(payload)s %(protocol)s\r\nHost: %(host)s\r\n\r\n" % ( { "method" : l_method, "payload" : l_payload, "protocol" : l_proto, "host" : m_hostname, "port" : m_port } ) if m_debug: print "REQUEST" print l_raw_request # Do the connection l_response = None try: m_raw_request = HTTP_Raw_Request(l_raw_request) discard_data(m_raw_request) l_response = HTTP.make_raw_request( host = m_hostname, port = m_port, raw_request = m_raw_request, callback = check_raw_response) if l_response: discard_data(l_response) except NetworkException,e: Logger.log_error_more_verbose("Server-Fingerprint plugin: No response for URL (%s) '%s'. Message: %s" % (l_method, l_url, str(e))) continue if not l_response: Logger.log_error_more_verbose("No response for URL '%s'." % l_url) continue if m_debug: print "RESPONSE" print l_response.raw_headers # Update the status update_status_func((float(i) * 100.0) / float(m_data_len)) Logger.log_more_verbose("Making '%s' test." % (l_wordlist)) i += 1 # Analyze for each wordlist # # Store the server banner try: m_banners_counter[l_response.headers["Server"]] += l_weight except KeyError: pass # # ===================== # HTTP directly related # ===================== # # for l_http_header_name, l_header_wordlist in m_wordlists_HTTP_fields.iteritems(): # Check if HTTP header field is in response if l_http_header_name not in l_response.headers: continue l_curr_header_value = l_response.headers[l_http_header_name] # Generate concrete wordlist name l_wordlist_path = Config.plugin_extra_config[l_wordlist][l_header_wordlist] # Load words for the wordlist l_wordlist_instance = WordListLoader.get_wordlist_as_dict(l_wordlist_path) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_curr_header_value) m_counters.inc(l_matches, l_action, l_weight, l_http_header_name, message="HTTP field: " + l_curr_header_value) # # ======================= # HTTP INdirectly related # ======================= # # # # Status code # =========== # l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statuscode"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_response.status) m_counters.inc(l_matches, l_action, l_weight, "statuscode", message="Status code: " + l_response.status) # # Status text # =========== # l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["statustext"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_response.reason) m_counters.inc(l_matches, l_action, l_weight, "statustext", message="Status text: " + l_response.reason) # # Header space # ============ # # Count the number of spaces between HTTP field name and their value, for example: # -> Server: Apache 1 # The number of spaces are: 1 # # -> Server:Apache 1 # The number of spaces are: 0 # l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-space"]) # Looking for matches try: l_http_value = l_response.headers[0] # get the value of first HTTP field l_spaces_num = str(abs(len(l_http_value) - len(l_http_value.lstrip()))) l_matches = l_wordlist_instance.matches_by_value(l_spaces_num) m_counters.inc(l_matches, l_action, l_weight, "header-space", message="Header space: " + l_spaces_num) except IndexError: print "index error header space" pass # # Header capitalafterdash # ======================= # # Look for non capitalized first letter of field name, for example: # -> Content-type: .... # Instead of: # -> Content-Type: .... # l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-capitalafterdash"]) # Looking for matches l_valid_fields = [x for x in l_response.headers.iterkeys() if "-" in x] if l_valid_fields: l_h = l_valid_fields[0] l_value = l_h.split("-")[1] # Get the second value: Content-type => type l_dush = None if l_value[0].isupper(): # Check first letter is lower l_dush = 1 else: l_dush = 0 l_matches = l_wordlist_instance.matches_by_value(l_dush) m_counters.inc(l_matches, l_action, l_weight, "header-capitalizedafterdush", message="Capital after dash: %s" % str(l_dush)) # # Header order # ============ # l_header_order = ','.join(l_response.headers.iterkeys()) l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["header-order"]) l_matches = l_wordlist_instance.matches_by_value(l_header_order) m_counters.inc(l_matches, l_action, l_weight, "header-order", message="Header order: " + l_header_order) # # Protocol name # ============ # # For a response like: # -> HTTP/1.0 200 OK # .... # # Get the 'HTTP' value. # try: l_proto = l_response.protocol # Get the 'HTTP' text from response, if available if l_proto: l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-name"]) l_matches = l_wordlist_instance.matches_by_value(l_proto) m_counters.inc(l_matches, l_action, l_weight, "proto-name", message="Proto name: " + l_proto) except IndexError: print "index error protocol name" pass # # Protocol version # ================ # # For a response like: # -> HTTP/1.0 200 OK # .... # # Get the '1.0' value. # try: l_version = l_response.version # Get the '1.0' text from response, if available if l_version: l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["protocol-version"]) l_matches = l_wordlist_instance.matches_by_value(l_version) m_counters.inc(l_matches, l_action, l_weight, "proto-version", message="Proto version: " + l_version) except IndexError: print "index error protocol version" pass if "ETag" in l_response.headers: l_etag_header = l_response.headers["ETag"] # # ETag length # ================ # l_etag_len = len(l_etag_header) l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-legth"]) l_matches = l_wordlist_instance.matches_by_value(l_etag_len) m_counters.inc(l_matches, l_action, l_weight, "etag-length", message="ETag length: " + str(l_etag_len)) # # ETag Quotes # ================ # l_etag_striped = l_etag_header.strip() if l_etag_striped.startswith("\"") or l_etag_striped.startswith("'"): l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["etag-quotes"]) l_matches = l_wordlist_instance.matches_by_value(l_etag_striped[0]) m_counters.inc(l_matches, l_action, l_weight, "etag-quotes", message="Etag quotes: " + l_etag_striped[0]) if "Vary" in l_response.headers: l_vary_header = l_response.headers["Vary"] # # Vary delimiter # ================ # # Checks if Vary header delimiter is something like this: # -> Vary: Accept-Encoding,User-Agent # Or this: # -> Vary: Accept-Encoding, User-Agent # l_var_delimiter = ", " if l_vary_header.find(", ") else "," l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-delimiter"]) l_matches = l_wordlist_instance.matches_by_value(l_var_delimiter) m_counters.inc(l_matches, l_action, l_weight, "vary-delimiter", message="Vary delimiter: " + l_var_delimiter) # # Vary capitalizer # ================ # # Checks if Vary header delimiter is something like this: # -> Vary: Accept-Encoding,user-Agent # Or this: # -> Vary: accept-encoding,user-agent # l_vary_capitalizer = str(0 if l_vary_header == l_vary_header.lower() else 1) l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-capitalize"]) l_matches = l_wordlist_instance.matches_by_value(l_vary_capitalizer) m_counters.inc(l_matches, l_action, l_weight, "vary-capitalize", message="Vary capitalizer: " + l_vary_capitalizer) # # Vary order # ================ # # Checks order between vary values: # -> Vary: Accept-Encoding,user-Agent # Or this: # -> Vary: User-Agent,Accept-Encoding # l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["vary-order"]) l_matches = l_wordlist_instance.matches_by_value(l_vary_header) m_counters.inc(l_matches, l_action, l_weight, "vary-order", message="Vary order: " + l_vary_header) # # ===================== # HTTP specific options # ===================== # # if l_action == "HEAD": # # HEAD Options # ============ # l_option = l_response.headers.get("Allow") if l_option: l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_option) m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="HEAD option: " + l_option) if l_action == "OPTIONS" or l_action == "INVALID" or l_action == "DELETE": if "Allow" in l_response.headers: # # Options allow # ============= # l_option = l_response.headers.get("Allow") if l_option: l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_option) m_counters.inc(l_matches, l_action, l_weight, "options-allow", message="OPTIONS allow: " + l_action + " # " + l_option) # # Allow delimiter # =============== # l_option = l_response.headers.get("Allow") if l_option: l_var_delimiter = ", " if l_option.find(", ") else "," l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-delimited"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_var_delimiter) m_counters.inc(l_matches, l_action, l_weight, "options-delimiter", message="OPTION allow delimiter " + l_action + " # " + l_option) if "Public" in l_response.headers: # # Public response # =============== # l_option = l_response.headers.get("Public") if l_option: l_wordlist_instance = WordListLoader.get_wordlist_as_dict(Config.plugin_extra_config[l_wordlist]["options-public"]) # Looking for matches l_matches = l_wordlist_instance.matches_by_value(l_option) m_counters.inc(l_matches, l_action, l_weight, "options-public", message="Public response: " + l_action + " # " + l_option)
def recv_info(self, info): m_url = info.url Logger.log_more_verbose("Start to process URL: %r" % m_url) # # Get the remote web server fingerprint # m_webserver_finger = info.get_associated_informations_by_category(WebServerFingerprint.information_type) m_wordlist = set() # There is fingerprinting information? if m_webserver_finger: m_webserver_finger = m_webserver_finger.pop() m_server_canonical_name = m_webserver_finger.name_canonical m_servers_related = m_webserver_finger.related # Set with related web servers # # Load wordlists # m_wordlist_update = m_wordlist.update # Common wordlist try: w = Config.plugin_extra_config["common"] m_wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: pass # Wordlist of server name try: w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name] m_wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: pass # Wordlist of related with the server found try: for l_servers_related in m_servers_related: w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name] m_wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: pass else: # Common wordlists try: w = Config.plugin_extra_config["common"] m_wordlist.update([l_w for l_w in w.itervalues()]) except KeyError: pass # Load content of wordlists m_urls = set() m_urls_update = m_urls.update # Fixed Url m_url_fixed = m_url if m_url.endswith("/") else "%s/" % m_url for l_w in m_wordlist: # Use a copy of wordlist to avoid modify the original source l_loaded_wordlist = WordListLoader.get_advanced_wordlist_as_list(l_w) m_urls_update((urljoin(m_url_fixed, (l_wo[1:] if l_wo.startswith("/") else l_wo)) for l_wo in l_loaded_wordlist)) # Generates the error page m_error_response = get_error_page(m_url) # Create the matching analyzer try: m_store_info = MatchingAnalyzer(m_error_response, min_ratio=0.65) except ValueError: # Thereis not information return # Create the partial funs _f = partial(process_url, severity_vectors['predictables'], get_http_method(m_url), m_store_info, self.update_status, len(m_urls)) # Process the URLs for i, l_url in enumerate(m_urls): _f((i, l_url)) # Generate and return the results. return generate_results(m_store_info.unique_texts)
def test__load_wordlists_input(self): # Reload wordlist WordListLoader._WordListLoader__load_wordlists("../../wordlist") # Check assert len(WordListLoader._WordListLoader__store) != 0
def run(self, info): # Get the root domain only. root = info.root # Skip localhost. if root == "localhost": return # Skip root domains we've already processed. if self.state.put(root, True): return # Load the subdomains wordlist. try: wordlist = WordListLoader.get_wordlist_as_list(Config.plugin_args["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"]) return # Load the subdomains whitelist. try: whitelist = WordListLoader.get_wordlist_as_list(Config.plugin_config["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_config["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_config["wordlist"]) return # # Set a base line for dinamyc sub-domains # m_virtual_domains = [] for v in (generate_random_string(40) for x in xrange(3)): l_subdomain = ".".join((v, root)) records = DNS.get_a(l_subdomain, also_CNAME=True) for rec in records: if rec.type == "CNAME": m_virtual_domains.append(rec.target) # If 3 subdomains are the same, set the base domain m_base_domain = None if len(set(m_virtual_domains)) == 1: m_base_domain = m_virtual_domains[0] # Configure the progress notifier. self.progress.set_total(len(wordlist)) self.progress.min_delta = 1 # notify every 1% # For each subdomain in the wordlist... found = 0 results = [] visited = set() for prefix in wordlist: # Mark as completed before actually trying. # We can't put this at the end of the loop where it belongs, # because the "continue" statements would skip over this too. self.progress.add_completed() # Build the domain name. name = ".".join((prefix, root)) # Skip if out of scope. if name not in Config.audit_scope: continue # Resolve the subdomain. records = DNS.get_a(name, also_CNAME=True) records.extend( DNS.get_aaaa(name, also_CNAME=True) ) # If no DNS records were found, skip. if not records: continue # If CNAME is the base domain, skip chk = [True for x in records if x.type == "CNAME" and x.target == m_base_domain] if len(chk) > 0 and all(chk): continue # We found a subdomain! found += 1 Logger.log_more_verbose( "Subdomain found: %s" % name) # Create the Domain object for the subdomain. domain = Domain(name) results.append(domain) # # Check for Domain disclosure # if prefix not in whitelist: d = DomainDisclosure(domain, risk = 0, level = "low", title = "Possible subdomain leak", description = "A subdomain was discovered which may be an unwanted information disclosure." ) results.append(d) # For each DNs record, grab the address or name. # Skip duplicated records. for rec in records: if rec.type == "CNAME": location = rec.target elif rec.type in ("A", "AAAA"): location = rec.address else: # should not happen... results.append(rec) domain.add_information(rec) continue if location not in visited: visited.add(location) results.append(rec) domain.add_information(rec) # Log the results. if found: Logger.log( "Found %d subdomains for root domain: %s" % (found, root)) else: Logger.log_verbose( "No subdomains found for root domain: %s" % root) # Return the results. return results
def recv_info(self, info): # Get the root domain only. root = info.root # Skip localhost. if root == "localhost": return # Skip root domains we've already processed. if self.state.put(root, True): return # Load the subdomains wordlist. try: wordlist = WordListLoader.get_advanced_wordlist_as_list( Config.plugin_args["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"]) return # Configure the progress notifier. self.progress.set_total(len(wordlist)) self.progress.min_delta = 1 # notify every 1% # For each subdomain in the wordlist... found = 0 results = [] visited = set() for prefix in wordlist: # Mark as completed before actually trying. # We can't put this at the end of the loop where it belongs, # because the "continue" statements would skip over this too. self.progress.add_completed() # Build the domain name. name = ".".join((prefix, root)) # Skip if out of scope. if name not in Config.audit_scope: continue # Resolve the subdomain. records = DNS.get_a(name, also_CNAME=True) records.extend(DNS.get_aaaa(name, also_CNAME=True)) # If no DNS records were found, skip. if not records: continue # We found a subdomain! found += 1 Logger.log_more_verbose("Subdomain found: %s" % name) # Create the Domain object for the subdomain. domain = Domain(name) results.append(domain) # For each DNs record, grab the address or name. # Skip duplicated records. for rec in records: if rec.type == "CNAME": location = rec.target elif rec.type in ("A", "AAAA"): location = rec.address else: # should not happen... results.append(rec) domain.add_information(rec) continue if location not in visited: visited.add(location) results.append(rec) domain.add_information(rec) # Log the results. if found: Logger.log("Found %d subdomains for root domain: %s" % (found, root)) else: Logger.log_verbose("No subdomains found for root domain: %s" % root) # Return the results. return results
def recv_info(self, info): m_url = info.url Logger.log_more_verbose("Start to process URL: %r" % m_url) # # Get the remote web server fingerprint # m_webserver_finger = info.get_associated_informations_by_category(WebServerFingerprint.information_type) m_wordlist = set() # There is fingerprinting information? if m_webserver_finger: m_webserver_finger = m_webserver_finger.pop() m_server_canonical_name = m_webserver_finger.name_canonical m_servers_related = m_webserver_finger.related # Set with related web servers # # Load wordlists # m_wordlist_update = m_wordlist.update # Common wordlist try: w = Config.plugin_extra_config["common"] m_wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error("Can't load wordlists") # Wordlist of server name try: w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name] m_wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error("Can't load wordlists") # Wordlist of related with the server found try: for l_servers_related in m_servers_related: w = Config.plugin_extra_config["%s_predictables" % m_server_canonical_name] m_wordlist_update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error("Can't load wordlists") else: # Common wordlists try: w = Config.plugin_extra_config["common"] m_wordlist.update([l_w for l_w in w.itervalues()]) except KeyError: Logger.log_error("Can't load wordlists") # Load content of wordlists m_urls = set() m_urls_update = m_urls.update # Fixed Url m_url_fixed = m_url if m_url.endswith("/") else "%s/" % m_url for l_w in m_wordlist: # Use a copy of wordlist to avoid modify the original source l_loaded_wordlist = WordListLoader.get_advanced_wordlist_as_list(l_w) m_urls_update((urljoin(m_url_fixed, (l_wo[1:] if l_wo.startswith("/") else l_wo)) for l_wo in l_loaded_wordlist)) # Generates the error page m_error_response = get_error_page(m_url) # Create the matching analyzer try: m_store_info = MatchingAnalyzer(m_error_response, min_ratio=0.65) except ValueError: # Thereis not information return # Create the partial funs _f = partial(process_url, severity_vectors['predictables'], get_http_method(m_url), m_store_info, self.update_status, len(m_urls)) # Process the URLs for i, l_url in enumerate(m_urls): _f((i, l_url)) # Generate and return the results. return generate_results(m_store_info.unique_texts)
def check_download(self, url, name, content_length, content_type): # Only accept content when the content type header is present. if not content_type: Logger.log_more_verbose("Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose("Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. parsed_url = parse_url(url) if not parsed_url.filename: # Approved! return True # Extension absent. if not parsed_url.extension: # Approved! return True # Match against a known list of valid HTML extensions. # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage if parsed_url.extension in (".xml", ".html", ".htm", ".xhtml", ".xht", ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml", ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso", ".lassoapp", ".pl", ".php", ".php3", ".phtml", ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom", ".xml", ".eml", ".jsonld", ".metalink", ".met", ".rss", ".xml", ".markdown"): # Approved! return True # If URL path in blacklist? m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] if any(x in url for x in m_forbidden): return False # Success! return True
class Spider(TestingPlugin): """ This plugin is a web spider. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [URL] #-------------------------------------------------------------------------- def run(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %s" % m_url) # Check if need follow first redirect, then follow the link. p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_error_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links m_forms = None if p.information_type == HTML.data_subtype: m_links = extract_from_html(p.raw_data, m_url) m_forms = extract_forms_from_html(p.raw_data, m_url) #m_links.update( extract_from_text(p.raw_data, m_url) ) elif p.information_type == Text.data_subtype: m_links = extract_from_text(p.raw_data, m_url) else: return m_return try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_more_verbose("No links found in URL: %s" % m_url) # Convert to URL data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = URL(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) print m_resource m_resource.add_resource(info) m_return.append(m_resource) # Get forms info if m_forms: m_forms_allowed = [ url for url in m_forms if not any(x in url[0] for x in m_forbidden) ] m_forms_not_allowed = {x[0] for x in m_forms }.difference(x[0] for x in m_forms_allowed) else: m_forms_allowed = [] m_forms_not_allowed = set() if m_forms_not_allowed: Logger.log_more_verbose("Skipped forbidden forms:\n %s" % "\n ".join(sorted(m_forms_not_allowed))) # Do not follow forms out of scope m_forms_in_scope = [] m_broken = [] for url in m_forms_allowed: try: if url[0] in Config.audit_scope: m_forms_in_scope.append(url) except Exception: m_broken.append(url[0]) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable forms: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable forms:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_forms_allowed) - len( m_forms_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d forms out of scope." % m_out_of_scope_count) if m_forms_in_scope: Logger.log_verbose("Found %d forms in URL: %s" % (len(m_forms_in_scope), m_url)) else: Logger.log_more_verbose("No forms found in URL: %s" % m_url) # Convert to URL data type for u in m_forms_in_scope: try: url = u[0] method = u[1] params = {x["name"]: x["value"] for x in u[2]} m_resource = URL(url=url, referer=m_url, method=method, post_params=params) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error("Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update(extract_from_text(info.raw_data, m_url)) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist_as_raw( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())
def recv_info(self, info): m_domain = info.root # Skips localhost if m_domain == "localhost": return m_return = None # Checks if the hostname has been already processed if not self.state.check(m_domain): # # Looking for # m_subdomains = WordListLoader.get_advanced_wordlist_as_list( "subs_small.txt") # Run in parallel self.base_domain = m_domain self.completed = Counter(0) self.total = len(m_subdomains) r = pmap(self.get_subdomains_bruteforcer, m_subdomains, pool_size=10) # # Remove repeated # # The results m_domains = set() m_domains_add = m_domains.add m_domains_already = [] m_domains_already_append = m_domains_already.append m_ips = set() m_ips_add = m_ips.add m_ips_already = [] m_ips_already_append = m_ips_already.append if r: for doms in r: for dom in doms: # Domains if dom.type == "CNAME": if not dom.target in m_domains_already: m_domains_already_append(dom.target) if dom.target in Config.audit_scope: m_domains_add(dom) else: discard_data(dom) # IPs if dom.type == "A": if dom.address not in m_ips_already: m_ips_already_append(dom.address) m_ips_add(dom) # Unify m_domains.update(m_ips) m_return = m_domains # Add the information to the host map(info.add_information, m_return) # Set the domain as processed self.state.set(m_domain, True) Logger.log_verbose("DNS analyzer plugin found %d subdomains" % len(m_return)) # Write the info as more user friendly if Logger.MORE_VERBOSE: m_tmp = [] m_tmp_append = m_tmp.append for x in m_return: if getattr(x, "address", False): m_tmp_append("%s (%s)" % (getattr(x, "address"), str(x))) elif getattr(x, "target", False): m_tmp_append("%s (%s)" % (getattr(x, "target"), str(x))) else: m_tmp_append(str(x)) Logger.log_more_verbose("Subdomains found: \n\t+ %s" % "\n\t+ ".join(m_tmp)) return m_return
def recv_info(self, info): # Get the root domain only. root = info.root # Skip localhost. if root == "localhost": return # Skip root domains we've already processed. if self.state.put(root, True): return # Load the subdomains wordlist. try: wordlist = WordListLoader.get_advanced_wordlist_as_list(Config.plugin_args["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_args["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_args["wordlist"]) return # Load the subdomains whitelist. try: whitelist = WordListLoader.get_advanced_wordlist_as_list(Config.plugin_config["wordlist"]) except WordlistNotFound: Logger.log_error_verbose("Wordlist '%s' not found.." % Config.plugin_config["wordlist"]) return except TypeError: Logger.log_error_verbose("Wordlist '%s' is not a file." % Config.plugin_config["wordlist"]) return # # Set a base line for dinamyc sub-domains # m_virtual_domains = [] for v in (generate_random_string(40) for x in xrange(3)): l_subdomain = ".".join((v, root)) records = DNS.get_a(l_subdomain, also_CNAME=True) for rec in records: if rec.type == "CNAME": m_virtual_domains.append(rec.target) # If 3 subdomains are the same, set the base domain m_base_domain = None if len(set(m_virtual_domains)) == 1: m_base_domain = m_virtual_domains[0] # Configure the progress notifier. self.progress.set_total(len(wordlist)) self.progress.min_delta = 1 # notify every 1% # For each subdomain in the wordlist... found = 0 results = [] visited = set() for prefix in wordlist: # Mark as completed before actually trying. # We can't put this at the end of the loop where it belongs, # because the "continue" statements would skip over this too. self.progress.add_completed() # Build the domain name. name = ".".join((prefix, root)) # Skip if out of scope. if name not in Config.audit_scope: continue # Resolve the subdomain. records = DNS.get_a(name, also_CNAME=True) records.extend( DNS.get_aaaa(name, also_CNAME=True) ) # If no DNS records were found, skip. if not records: continue # If CNAME is the base domain, skip chk = [True for x in records if x.type == "CNAME" and x.target == m_base_domain] if len(chk) > 0 and all(chk): continue # We found a subdomain! found += 1 Logger.log_more_verbose( "Subdomain found: %s" % name) # Create the Domain object for the subdomain. domain = Domain(name) results.append(domain) # # Check for Domain disclosure # if prefix not in whitelist: d = DomainDisclosure(name, risk = 0, level = "low", title = "Possible subdomain leak", description = "A subdomain was discovered which may be an unwanted information disclosure." ) d.add_resource(domain) results.append(d) # For each DNs record, grab the address or name. # Skip duplicated records. for rec in records: if rec.type == "CNAME": location = rec.target elif rec.type in ("A", "AAAA"): location = rec.address else: # should not happen... results.append(rec) domain.add_information(rec) continue if location not in visited: visited.add(location) results.append(rec) domain.add_information(rec) # Log the results. if found: Logger.log( "Found %d subdomains for root domain: %s" % (found, root)) else: Logger.log_verbose( "No subdomains found for root domain: %s" % root) # Return the results. return results
urls = set() #Logger.log(urls) for l_w in new_file: try: l_w = l_w[1:] if l_w.startswith("/") else l_w tmp_u = urljoin(m_url, l_w) except ValueError, e: Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u) continue urls.add(tmp_u) for l_w in wordlist: # Use a copy of wordlist to avoid modify the original source l_loaded_wordlist = WordListLoader.get_wordlist_as_list(l_w) for l_wo in l_loaded_wordlist: try: l_wo = l_wo[1:] if l_wo.startswith("/") else l_wo tmp_u = urljoin(m_url, l_wo) except ValueError, e: Logger.log_error("Failed to parse key, from wordlist, '%s'" % tmp_u) continue urls.add(tmp_u) Logger.log_verbose("Loaded %s URLs to test." % len(urls)) # Generates the error page error_response = get_error_page(m_url)
class Spider(TestingPlugin): """ This plugin is a web spider. """ #---------------------------------------------------------------------- def get_accepted_info(self): return [Url] #---------------------------------------------------------------------- def recv_info(self, info): m_return = [] m_url = info.url m_depth = info.depth # Check depth if Config.audit_config.depth is not None and m_depth > Config.audit_config.depth: Logger.log_more_verbose("Spider depth level exceeded for URL: %s" % m_url) return m_return Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (m_depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links if p.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(p.raw_data, m_url) else: m_links = extract_from_text(p.raw_data, m_url) try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_out_of_scope_count = len(m_urls_allowed) m_urls_allowed = [ url for url in m_urls_allowed if url in Config.audit_scope ] m_out_of_scope_count -= len(m_urls_allowed) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_allowed: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_verbose("No links found in URL: %s" % m_url) # Convert to Url data type for u in m_urls_allowed: m_resource = Url(url=u, depth=m_depth + 1, referer=m_url) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return