def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r or not m_r.raw_data: return None discard_data(m_r) # Get the first link m_links = None try: if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) except TypeError,e: Logger.log_error_more_verbose("Plugin error: %s" % format_exc()) return None
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r or not m_r.raw_data: return None discard_data(m_r) # Get the first link m_links = None try: if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) except TypeError, e: Logger.log_error_more_verbose("Plugin error: %s" % format_exc()) return None
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_advanced_wordlist_as_list( wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error( "Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update( extract_from_text(info.raw_data, m_url) ) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r: return None discard_data(m_r) # Get the first link if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) if not m_links: return None # Get the first link of the page that's in scope of the audit m_first_link = None for u in m_links: if u in Config.audit_scope and not any(x in u for x in m_forbidden): m_first_link = u break if not m_first_link: return None # Now get two request to the links. One to the original URL and other # as upper URL. # Original m_response_orig = HTTP.get_url(m_first_link, callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_orig) # Uppercase m_response_upper = HTTP.get_url(m_first_link.upper(), callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_upper) # Compare them m_orig_data = m_response_orig.raw_response if m_response_orig else "" m_upper_data = m_response_upper.raw_response if m_response_upper else "" m_match_level = get_diff_ratio(m_orig_data, m_upper_data) # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX m_return = None if m_match_level > 0.95: m_return = True else: m_return = False return m_return
class Spider(TestingPlugin): """ This plugin is a web spider. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [URL] #-------------------------------------------------------------------------- def run(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %s" % m_url) # Check if need follow first redirect, then follow the link. p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_error_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links m_forms = None if p.information_type == HTML.data_subtype: m_links = extract_from_html(p.raw_data, m_url) m_forms = extract_forms_from_html(p.raw_data, m_url) #m_links.update( extract_from_text(p.raw_data, m_url) ) elif p.information_type == Text.data_subtype: m_links = extract_from_text(p.raw_data, m_url) else: return m_return try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_more_verbose("No links found in URL: %s" % m_url) # Convert to URL data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = URL(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) print m_resource m_resource.add_resource(info) m_return.append(m_resource) # Get forms info if m_forms: m_forms_allowed = [ url for url in m_forms if not any(x in url[0] for x in m_forbidden) ] m_forms_not_allowed = {x[0] for x in m_forms }.difference(x[0] for x in m_forms_allowed) else: m_forms_allowed = [] m_forms_not_allowed = set() if m_forms_not_allowed: Logger.log_more_verbose("Skipped forbidden forms:\n %s" % "\n ".join(sorted(m_forms_not_allowed))) # Do not follow forms out of scope m_forms_in_scope = [] m_broken = [] for url in m_forms_allowed: try: if url[0] in Config.audit_scope: m_forms_in_scope.append(url) except Exception: m_broken.append(url[0]) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable forms: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable forms:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_forms_allowed) - len( m_forms_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d forms out of scope." % m_out_of_scope_count) if m_forms_in_scope: Logger.log_verbose("Found %d forms in URL: %s" % (len(m_forms_in_scope), m_url)) else: Logger.log_more_verbose("No forms found in URL: %s" % m_url) # Convert to URL data type for u in m_forms_in_scope: try: url = u[0] method = u[1] params = {x["name"]: x["value"] for x in u[2]} m_resource = URL(url=url, referer=m_url, method=method, post_params=params) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
class Spider(TestingPlugin): """ This plugin is a web spider. """ #---------------------------------------------------------------------- def get_accepted_info(self): return [Url] #---------------------------------------------------------------------- def recv_info(self, info): m_return = [] m_url = info.url m_depth = info.depth # Check depth if Config.audit_config.depth is not None and m_depth > Config.audit_config.depth: Logger.log_more_verbose("Spider depth level exceeded for URL: %s" % m_url) return m_return Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (m_depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links if p.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(p.raw_data, m_url) else: m_links = extract_from_text(p.raw_data, m_url) try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_out_of_scope_count = len(m_urls_allowed) m_urls_allowed = [ url for url in m_urls_allowed if url in Config.audit_scope ] m_out_of_scope_count -= len(m_urls_allowed) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_allowed: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_verbose("No links found in URL: %s" % m_url) # Convert to Url data type for u in m_urls_allowed: m_resource = Url(url=u, depth=m_depth + 1, referer=m_url) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r: return None discard_data(m_r) # Get the first link if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) if not m_links: return None # Get the first link of the page that's in scope of the audit m_first_link = None for u in m_links: if u in Config.audit_scope and not any(x in u for x in m_forbidden): m_first_link = u break if not m_first_link: return None # Now get two request to the links. One to the original URL and other # as upper URL. # Original m_response_orig = HTTP.get_url( m_first_link, callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_orig) # Uppercase m_response_upper = HTTP.get_url( m_first_link.upper(), callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_upper) # Compare them m_orig_data = m_response_orig.raw_response if m_response_orig else "" m_upper_data = m_response_upper.raw_response if m_response_upper else "" m_match_level = get_diff_ratio(m_orig_data, m_upper_data) # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX m_return = None if m_match_level > 0.95: m_return = True else: m_return = False return m_return
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error("Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update(extract_from_text(info.raw_data, m_url)) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist_as_raw( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())
class Spider(TestingPlugin): """ This plugin is a web spider. """ #---------------------------------------------------------------------- def get_accepted_info(self): return [Url] #---------------------------------------------------------------------- def recv_info(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links if p.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(p.raw_data, m_url) else: m_links = extract_from_text(p.raw_data, m_url) try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_in_scope), m_url)) else: Logger.log_verbose("No links found in URL: %s" % m_url) # Convert to Url data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = Url(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return