def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r or not m_r.raw_data: return None discard_data(m_r) # Get the first link m_links = None try: if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) except TypeError, e: Logger.log_error_more_verbose("Plugin error: %s" % format_exc()) return None
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r or not m_r.raw_data: return None discard_data(m_r) # Get the first link m_links = None try: if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) except TypeError,e: Logger.log_error_more_verbose("Plugin error: %s" % format_exc()) return None
def parse_nikto_results(info, output_filename): """ Convert the output of a Nikto scan to the GoLismero data model. :param info: Data object to link all results to (optional). :type info: BaseURL :param output_filename: Path to the output filename. The format should always be CSV. :type output_filename: :returns: Results from the Nikto scan, and the vulnerability count. :rtype: list(Data), int """ # Parse the scan results. # On error log the exception and continue. results = [] vuln_count = 0 hosts_seen = set() urls_seen = {} try: with open(output_filename, "rU") as f: csv_reader = reader(f) for row in csv_reader: try: # Each row (except for the first) has always # the same 7 columns, but some may be empty. if len(row) < 7: continue host, ip, port, vuln_tag, method, path, text = row[:7] # Report domain names and IP addresses. if ( (info is None or host != info.hostname) and host not in hosts_seen ): hosts_seen.add(host) if host in Config.audit_scope: results.append( Domain(host) ) if ip not in hosts_seen: hosts_seen.add(ip) if ip in Config.audit_scope: results.append( IP(ip) ) # Skip rows not informing of vulnerabilities. if not vuln_tag: continue # Calculate the vulnerable URL. if info is not None: target = urljoin(info.url, path) else: if port == 443: target = urljoin("https://%s/" % host, path) else: target = urljoin("http://%s/" % host, path) # Skip if out of scope. if target not in Config.audit_scope: continue # Report the URLs. if (target, method) not in urls_seen: url = URL(target, method) urls_seen[ (target, method) ] = url results.append(url) else: url = urls_seen[ (target, method) ] # Get the reference URLs. refs = extract_from_text(text) refs.difference_update(urls_seen.itervalues()) # Report the vulnerabilities. if vuln_tag == "OSVDB-0": kwargs = {"level": "informational"} else: kwargs = extract_vuln_ids( "%s: %s" % (vuln_tag, text)) kwargs["description"] = text if text else None kwargs["references"] = refs if "osvdb" in kwargs and "OSVDB-0" in kwargs["osvdb"]: tmp = list(kwargs["osvdb"]) tmp.remove("OSVDB-0") if tmp: kwargs["osvdb"] = tuple(tmp) else: del kwargs["osvdb"] if vuln_tag == "OSVDB-0": vuln = UncategorizedVulnerability(url, **kwargs) else: vuln = VulnerableWebApp(url, **kwargs) results.append(vuln) vuln_count += 1 # On error, log the exception and continue. except Exception, e: Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(format_exc()) # On error, log the exception. except Exception, e: Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(format_exc())
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_advanced_wordlist_as_list( wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error( "Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update( extract_from_text(info.raw_data, m_url) ) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r: return None discard_data(m_r) # Get the first link if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) if not m_links: return None # Get the first link of the page that's in scope of the audit m_first_link = None for u in m_links: if u in Config.audit_scope and not any(x in u for x in m_forbidden): m_first_link = u break if not m_first_link: return None # Now get two request to the links. One to the original URL and other # as upper URL. # Original m_response_orig = HTTP.get_url(m_first_link, callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_orig) # Uppercase m_response_upper = HTTP.get_url(m_first_link.upper(), callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_upper) # Compare them m_orig_data = m_response_orig.raw_response if m_response_orig else "" m_upper_data = m_response_upper.raw_response if m_response_upper else "" m_match_level = get_diff_ratio(m_orig_data, m_upper_data) # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX m_return = None if m_match_level > 0.95: m_return = True else: m_return = False return m_return
class Spider(TestingPlugin): """ This plugin is a web spider. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [URL] #-------------------------------------------------------------------------- def run(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %s" % m_url) # Check if need follow first redirect, then follow the link. p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_error_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links m_forms = None if p.information_type == HTML.data_subtype: m_links = extract_from_html(p.raw_data, m_url) m_forms = extract_forms_from_html(p.raw_data, m_url) #m_links.update( extract_from_text(p.raw_data, m_url) ) elif p.information_type == Text.data_subtype: m_links = extract_from_text(p.raw_data, m_url) else: return m_return try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_more_verbose("No links found in URL: %s" % m_url) # Convert to URL data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = URL(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) print m_resource m_resource.add_resource(info) m_return.append(m_resource) # Get forms info if m_forms: m_forms_allowed = [ url for url in m_forms if not any(x in url[0] for x in m_forbidden) ] m_forms_not_allowed = {x[0] for x in m_forms }.difference(x[0] for x in m_forms_allowed) else: m_forms_allowed = [] m_forms_not_allowed = set() if m_forms_not_allowed: Logger.log_more_verbose("Skipped forbidden forms:\n %s" % "\n ".join(sorted(m_forms_not_allowed))) # Do not follow forms out of scope m_forms_in_scope = [] m_broken = [] for url in m_forms_allowed: try: if url[0] in Config.audit_scope: m_forms_in_scope.append(url) except Exception: m_broken.append(url[0]) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable forms: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable forms:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_forms_allowed) - len( m_forms_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d forms out of scope." % m_out_of_scope_count) if m_forms_in_scope: Logger.log_verbose("Found %d forms in URL: %s" % (len(m_forms_in_scope), m_url)) else: Logger.log_more_verbose("No forms found in URL: %s" % m_url) # Convert to URL data type for u in m_forms_in_scope: try: url = u[0] method = u[1] params = {x["name"]: x["value"] for x in u[2]} m_resource = URL(url=url, referer=m_url, method=method, post_params=params) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def parse_nikto_results(info, output_filename): """ Convert the output of a Nikto scan to the GoLismero data model. :param info: Data object to link all results to (optional). :type info: BaseUrl :param output_filename: Path to the output filename. The format should always be CSV. :type output_filename: :returns: Results from the Nikto scan, and the vulnerability count. :rtype: list(Data), int """ # Parse the scan results. # On error log the exception and continue. results = [] vuln_count = 0 hosts_seen = set() urls_seen = {} try: with open(output_filename, "rU") as f: csv_reader = reader(f) for row in csv_reader: try: # Each row (except for the first) has always # the same 7 columns, but some may be empty. if len(row) < 7: continue host, ip, port, vuln_tag, method, path, text = row[:7] # Report domain names and IP addresses. if ((info is None or host != info.hostname) and host not in hosts_seen): hosts_seen.add(host) if host in Config.audit_scope: results.append(Domain(host)) if ip not in hosts_seen: hosts_seen.add(ip) if ip in Config.audit_scope: results.append(IP(ip)) # Skip rows not informing of vulnerabilities. if not vuln_tag: continue # Calculate the vulnerable URL. if info is not None: target = urljoin(info.url, path) else: if port == 443: target = urljoin("https://%s/" % host, path) else: target = urljoin("http://%s/" % host, path) # Skip if out of scope. if target not in Config.audit_scope: continue # Report the URLs. if (target, method) not in urls_seen: url = Url(target, method) urls_seen[(target, method)] = url results.append(url) else: url = urls_seen[(target, method)] # Get the reference URLs. refs = extract_from_text(text) refs.difference_update(urls_seen.itervalues()) # Report the vulnerabilities. if vuln_tag == "OSVDB-0": kwargs = {"level": "informational"} else: kwargs = extract_vuln_ids("%s: %s" % (vuln_tag, text)) kwargs["description"] = text if text else None kwargs["references"] = refs if "osvdb" in kwargs and "OSVDB-0" in kwargs["osvdb"]: tmp = list(kwargs["osvdb"]) tmp.remove("OSVDB-0") if tmp: kwargs["osvdb"] = tuple(tmp) else: del kwargs["osvdb"] if vuln_tag == "OSVDB-0": vuln = UncategorizedVulnerability(**kwargs) vuln.add_resource(url) else: vuln = VulnerableWebApp(url, **kwargs) results.append(vuln) vuln_count += 1 # On error, log the exception and continue. except Exception, e: Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(format_exc()) # On error, log the exception. except Exception, e: Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(format_exc())
# Get the vulnerability description. description = opv.description if not description: description = nvt.description if not description: description = nvt.summary if not description: description = None if opv.notes: description += "\n" + "\n".join( " - " + note.text for note in opv.notes ) # Get the reference URLs. references = extract_from_text(description) # Prepare the vulnerability properties. kwargs = { "level" : OPV_LEVELS_TO_GLM_LEVELS[level.lower()], "description" : description, "references" : references, "cvss_base" : cvss, "cve" : cve, } if name: kwargs["title"] = name # If we have the OpenVAS plugin database, look up the plugin ID # that reported this vulnerability and create the vulnerability # using a specific class. Otherwise use the vulnerability class
def parse_results(openvas_results, ip=None): """ Convert the OpenVAS scan results to the GoLismero data model. :param openvas_results: OpenVAS scan results. :type openvas_results: list(OpenVASResult) :param ip: (Optional) IP address to link the vulnerabilities to. :type ip: IP | None :returns: Scan results converted to the GoLismero data model. :rtype: list(Data) """ # This is where we'll store the results. results = [] # Remember the hosts we've seen so we don't create them twice. hosts_seen = {} # Map of OpenVAS levels to GoLismero levels. openvas_level_2_golismero = { "debug": "informational", "log": "informational", "low": "low", "medium": "middle", "high": "high", } RISKS = {"none": 0, "debug": 0, "log": 0, "low": 1, "medium": 2, "high": 3, "critical": 4} # Do we have the OpenVAS plugin database? if not os.path.exists(openvas_db): Logger.log_error("OpenVAS plugin not initialized, please run setup.py") return # Load database use_openvas_db = Pickler.load(open(openvas_db, "rb")) # For each OpenVAS result... for opv in openvas_results: try: # Get the host. host = opv.host if host is None: continue # # Get or create the vulnerable resource. # target = ip if host in hosts_seen: target = hosts_seen[host] elif not ip or ip.address != host: try: target = IP(host) except ValueError: target = Domain(host) hosts_seen[host] = target results.append(target) # Get the vulnerability description. description = opv.description if not description: description = nvt.description if not description: description = nvt.summary if not description: description = None # # Common data # oid = int(opv.nvt.oid.split(".")[-1]) nvt = opv.nvt cve = nvt.cve.split(", ") if nvt.cve else [] risk = RISKS.get(nvt.risk_factor.lower(), 0) name = getattr(nvt, "name", "") level = getattr(opv, "threat", "informational").lower() cvss_base = getattr(nvt, "cvss_base", 0.0) references = extract_from_text(description) # Get the reference URLs. # Notes in vuln? if opv.notes: description += "\n" + "\n".join(" - " + note.text for note in opv.notes) # # Prepare the vulnerability properties. # kwargs = { "level": openvas_level_2_golismero[level.lower()], "description": description, "references": references, "cve": cve, "risk": risk, "severity": risk, "impact": risk, "cvss_base": cvss_base, "title": name, "tool_id": "openvas_plugin_%s" % str(oid), } # If we have the OpenVAS plugin database, look up the plugin ID # that reported this vulnerability and create the vulnerability # using a specific class. Otherwise use the vulnerability class # for uncategorized vulnerabilities. candidate_classes = ["UncategorizedVulnerability"] # Looking for plugin ID in database if oid in use_openvas_db: candidate_classes = use_openvas_db[oid][0] # Make vulnerabilities for c in candidate_classes: clazz = globals()[c] # Create the vuln vuln = clazz(target, **kwargs) # Add the vulnerability. results.append(vuln) # Skip on error. except Exception, e: t = format_exc() Logger.log_error_verbose("Error parsing OpenVAS results: %s" % str(e)) Logger.log_error_more_verbose(t)
def parse_results(openvas_results, ip=None): """ Convert the OpenVAS scan results to the GoLismero data model. :param openvas_results: OpenVAS scan results. :type openvas_results: list(OpenVASResult) :param ip: (Optional) IP address to link the vulnerabilities to. :type ip: IP | None :returns: Scan results converted to the GoLismero data model. :rtype: list(Data) """ # This is where we'll store the results. results = [] # Remember the hosts we've seen so we don't create them twice. hosts_seen = {} # Map of OpenVAS levels to GoLismero levels. openvas_level_2_golismero = { 'debug': 'informational', 'log': 'informational', 'low': "low", 'medium': 'middle', 'high': "high", } RISKS = { 'none': 0, 'debug': 0, 'log': 0, 'low': 1, 'medium': 2, 'high': 3, 'critical': 4 } # Do we have the OpenVAS plugin database? if not os.path.exists(openvas_db): Logger.log_error( "OpenVAS plugin not initialized, please run setup.py") return # Load database use_openvas_db = Pickler.load(open(openvas_db, "rb")) # For each OpenVAS result... for opv in openvas_results: try: # Get the host. host = opv.host if host is None: continue # # Get or create the vulnerable resource. # target = ip if host in hosts_seen: target = hosts_seen[host] elif not ip or ip.address != host: try: target = IP(host) except ValueError: target = Domain(host) hosts_seen[host] = target results.append(target) # Get the vulnerability description. description = opv.description if not description: description = nvt.description if not description: description = nvt.summary if not description: description = None # # Common data # oid = int(opv.nvt.oid.split(".")[-1]) nvt = opv.nvt cve = nvt.cve.split(", ") if nvt.cve else [] risk = RISKS.get(nvt.risk_factor.lower(), 0) name = getattr(nvt, "name", "") level = getattr(opv, "threat", "informational").lower() cvss_base = getattr(nvt, "cvss_base", 0.0) references = extract_from_text( description) # Get the reference URLs. # Notes in vuln? if opv.notes: description += "\n" + "\n".join(" - " + note.text for note in opv.notes) # # Prepare the vulnerability properties. # kwargs = { "level": openvas_level_2_golismero[level.lower()], "description": description, "references": references, "cve": cve, "risk": risk, "severity": risk, "impact": risk, "cvss_base": cvss_base, "title": name, "tool_id": "openvas_plugin_%s" % str(oid) } # If we have the OpenVAS plugin database, look up the plugin ID # that reported this vulnerability and create the vulnerability # using a specific class. Otherwise use the vulnerability class # for uncategorized vulnerabilities. candidate_classes = ["UncategorizedVulnerability"] # Looking for plugin ID in database if oid in use_openvas_db: candidate_classes = use_openvas_db[oid][0] # Make vulnerabilities for c in candidate_classes: clazz = globals()[c] # Create the vuln vuln = clazz(target, **kwargs) # Add the vulnerability. results.append(vuln) # Skip on error. except Exception, e: t = format_exc() Logger.log_error_verbose("Error parsing OpenVAS results: %s" % str(e)) Logger.log_error_more_verbose(t)
class Spider(TestingPlugin): """ This plugin is a web spider. """ #---------------------------------------------------------------------- def get_accepted_info(self): return [Url] #---------------------------------------------------------------------- def recv_info(self, info): m_return = [] m_url = info.url m_depth = info.depth # Check depth if Config.audit_config.depth is not None and m_depth > Config.audit_config.depth: Logger.log_more_verbose("Spider depth level exceeded for URL: %s" % m_url) return m_return Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (m_depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links if p.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(p.raw_data, m_url) else: m_links = extract_from_text(p.raw_data, m_url) try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_out_of_scope_count = len(m_urls_allowed) m_urls_allowed = [ url for url in m_urls_allowed if url in Config.audit_scope ] m_out_of_scope_count -= len(m_urls_allowed) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_allowed: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_verbose("No links found in URL: %s" % m_url) # Convert to Url data type for u in m_urls_allowed: m_resource = Url(url=u, depth=m_depth + 1, referer=m_url) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r: return None discard_data(m_r) # Get the first link if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) if not m_links: return None # Get the first link of the page that's in scope of the audit m_first_link = None for u in m_links: if u in Config.audit_scope and not any(x in u for x in m_forbidden): m_first_link = u break if not m_first_link: return None # Now get two request to the links. One to the original URL and other # as upper URL. # Original m_response_orig = HTTP.get_url( m_first_link, callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_orig) # Uppercase m_response_upper = HTTP.get_url( m_first_link.upper(), callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_upper) # Compare them m_orig_data = m_response_orig.raw_response if m_response_orig else "" m_upper_data = m_response_upper.raw_response if m_response_upper else "" m_match_level = get_diff_ratio(m_orig_data, m_upper_data) # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX m_return = None if m_match_level > 0.95: m_return = True else: m_return = False return m_return
def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error("Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update(extract_from_text(info.raw_data, m_url)) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist_as_raw( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc())
class Spider(TestingPlugin): """ This plugin is a web spider. """ #---------------------------------------------------------------------- def get_accepted_info(self): return [Url] #---------------------------------------------------------------------- def recv_info(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links if p.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(p.raw_data, m_url) else: m_links = extract_from_text(p.raw_data, m_url) try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_in_scope), m_url)) else: Logger.log_verbose("No links found in URL: %s" % m_url) # Convert to Url data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = Url(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return