def test_url_errors(): print "Testing URL parsing errors..." for url in errors: try: parse_url(url).url raise AssertionError(url) except ValueError: pass
def test_relative_urls(): print "Testing relative URL parsing..." for rel, ab in relative: ##print rel ##print parse_url(relative, 'http://example.com/path/').url ##print ab assert parse_url(rel, 'http://example.com/path/').url == ab
def sf_SSL_CERTIFICATE_MISMATCH(self, sf_module, source, raw_data): domain = Domain(parse_url(source).host) vulnerability = InvalidCertificate( # XXX or is it InvalidCommonName? domain = domain, tool_id = sf_module, ) return domain, vulnerability
def sf_SSL_CERTIFICATE_EXPIRED(self, sf_module, source, raw_data): domain = Domain(parse_url(source).host) vulnerability = OutdatedCertificate( domain = domain, tool_id = sf_module, ) return domain, vulnerability
def check_params(self): # Check the parameters. try: raw_url = Config.plugin_args["url"] assert raw_url, "Missing URL" url = parse_url(raw_url) assert url.scheme, "Invalid URL" assert url.host, "Invalid URL" except Exception, e: raise ValueError(str(e))
def test_equivalent_urls(): print "Testing URL sanitization..." for url_list in equivalent: normalized = set() for url in url_list: normalized.add(parse_url(url).url) ##pprint(normalized) assert len(normalized) == 1 normal = normalized.pop() ##print ##print normal, url_list assert normal in url_list
def check_params(self): # Check the parameters. try: raw_url = Config.plugin_args["url"] assert raw_url, "SpiderFoot plugin not configured!" \ " Please specify the URL to connect to" \ " the SpiderFoot server." url = parse_url(raw_url) assert url.scheme, "Invalid URL" assert url.host, "Invalid URL" except Exception, e: raise ValueError(str(e))
def check_download(self, url, name, content_length, content_type): # Check the file type is text. if not content_type: Logger.log_more_verbose("Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose("Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose("Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose("Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. parsed_url = parse_url(url) if not parsed_url.filename: # Approved! return True if not parsed_url.extension: return True # List from wikipedia: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage if parsed_url.extension not in (".xml", ".html", ".htm", ".xhtml", ".xht", \ ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml", \ ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso", \ ".lassoapp", ".pl", ".php", ".php3", ".phtml", \ ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom", \ ".xml", ".eml", ".jsonld", ".metalink", ".met", \ ".rss", ".xml", ".markdown"): return False # Approved! return True
def check_download(self, url, name, content_length, content_type): # Check the file type is text. if not content_type or not content_type.strip().lower().startswith( "text/"): Logger.log_more_verbose("Skipping URL, binary content: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose("Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_length > 100000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. if not parse_url(url).filename: # Approved! return True # Content length absent but likely points to a webpage. if "download" in url or name[name.rfind(".") + 1:].lower() not in ( "htm", "html", "php", "asp", "aspx", "jsp", ): Logger.log_more_verbose( "Skipping URL, content is likely not text: %s" % url) return False # Approved! return True
def __escape_rst(self, s): if not isinstance(s, basestring): s = str(s) s = s.replace("\t", " " * 8) s = s.replace("\r\n", "\n") s = s.replace("\r", "\n") s = self.__re_unindent.sub("", s) try: u = parse_url(s) except Exception: u = None if u is not None and u.scheme in ("http", "https", "ftp", "mailto"): s = "`%s <%s>`_" % (self.__re_escape_rst.sub(r"\\\1", s), u.url) else: s = self.__re_escape_rst.sub(r"\\\1", s) return s
def check_download(self, url, name, content_length, content_type): # Check the file type is text. if not content_type or not content_type.strip().lower().startswith("text/"): Logger.log_more_verbose("Skipping URL, binary content: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose("Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_length > 100000: Logger.log_more_verbose("Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. if not parse_url(url).filename: # Approved! return True # Content length absent but likely points to a webpage. if "download" in url or name[name.rfind(".")+1:].lower() not in ( "htm", "html", "php", "asp", "aspx", "jsp", ): Logger.log_more_verbose("Skipping URL, content is likely not text: %s" % url) return False # Approved! return True
def check_download(self, url, name, content_length, content_type): # Check the file type is text. if not content_type: Logger.log_more_verbose("Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose("Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose("Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose("Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. if not parse_url(url).filename: # Approved! return True # Approved! return True
def sf_WEBSERVER_BANNER(self, sf_module, source, raw_data): parsed = parse_url(source) domain = Domain(parsed.host) banner = Banner(domain, raw_data, parsed.port) return domain, banner
def run(self, info): # Get the base URL to the SpiderFoot API. base_url = Config.plugin_args["url"] # Find out if we should delete the scan when we're done. must_delete = Config.audit_config.boolean( Config.plugin_args.get("delete", "y")) # We need to catch SystemExit in order to stop and delete the scan. scan_id = None try: # Create a new scan. resp = post(urljoin(base_url, "startscan"), { "scanname": Config.audit_name, "scantarget": info.hostname, "modulelist": self.get_list("modulelist", "module_"), "typelist": self.get_list("typelist", "type_"), }) if resp.status_code != 200: r = resp.content p = r.find("<div class=\"alert alert-error\">") if p >= 0: p = r.find("<h4>", p) + 4 q = r.find("</h4>", p) m = r[p:q].strip() raise RuntimeError("Could not start scan, reason: " + m) # Wait until the scan is finished. try: interval = float(Config.plugin_args.get("interval", "5.0")) except Exception: interval = 5.0 url_scanlist = urljoin(base_url, "scanlist") last_msg = "" is_created = False scan_id = None while True: resp = get(url_scanlist) if resp.status_code != 200: status = "ERROR-FAILED" break scanlist = resp.json() found = False for scan in scanlist: scan_id, scan_name = scan[:2] status, count = scan[-2:] if scan_name == Config.audit_name: found = True break if found: is_created = True is_finished = status in ("FINISHED", "ABORTED", "ERROR-FAILED") msg = "Status: %s (%s elements%s)" % ( status, count, " so far" if not is_finished else "" ) if msg != last_msg: last_msg = msg Logger.log_verbose(msg) if is_finished: break else: if not is_created: Logger.log_verbose("Status: CREATING") else: Logger.log_verbose("Status: DELETED") Logger.log_error( "Scan deleted from the SpiderFoot UI, aborting!") return sleep(interval) # Tell the user if the scan didn't finish correctly. results = None try: has_partial = is_created and int(count) > 0 except Exception: has_partial = is_created try: # Get the scan results. if has_partial: Logger.log_error("Scan didn't finish correctly!") Logger.log("Attempting to load partial results...") parser = SpiderFootParser() url = parse_url("scaneventresultexport", base_url) url.query_params = {"id": scan_id, "type": "ALL"} resp = get(url.url) if resp.status_code != 200: Logger.log_error( "Could not get scan results, error code: %s" % resp.status_code) else: results = parser.parse(StringIO(resp.content)) if results: if len(results) == 1: Logger.log("Loaded 1 result.") else: Logger.log("Loaded %d results." % len(results)) else: Logger.log("No results loaded.") else: Logger.log_error("Scan didn't finish correctly, aborting!") finally: # Delete the scan. try: if is_created and must_delete: url = parse_url("scandelete", base_url) url.query_params = {"id": scan_id, "confirm": "1"} get(url.url) ##if resp.status_code != 200: ## Logger.log_error_more_verbose( ## "Could not delete scan, error code: %s" ## % resp.status_code) except Exception, e: tb = format_exc() Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(tb) # Return the results. return results
## % resp.status_code) except Exception, e: tb = format_exc() Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(tb) # Return the results. return results # If we caught SystemExit, that means GoLismero is shutting down. # Just stop and delete the scan in SpiderFoot without logging # anything nor calling the GoLismero API (it won't work anymore). except SystemExit: if scan_id is not None: try: url = parse_url("stopscan", base_url) url.query_params = {"id": scan_id} get(url.url) finally: if must_delete: url = parse_url("scandelete", base_url) url.query_params = {"id": scan_id, "confirm": "1"} get(url.url) raise #-------------------------------------------------------------------------- @staticmethod def get_list(name, prefix): return ",".join( prefix + token.strip()
class Spider(TestingPlugin): """ This plugin is a web spider. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [URL] #-------------------------------------------------------------------------- def run(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %s" % m_url) # Check if need follow first redirect, then follow the link. p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_error_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links m_forms = None if p.information_type == HTML.data_subtype: m_links = extract_from_html(p.raw_data, m_url) m_forms = extract_forms_from_html(p.raw_data, m_url) #m_links.update( extract_from_text(p.raw_data, m_url) ) elif p.information_type == Text.data_subtype: m_links = extract_from_text(p.raw_data, m_url) else: return m_return try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_more_verbose("No links found in URL: %s" % m_url) # Convert to URL data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = URL(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) print m_resource m_resource.add_resource(info) m_return.append(m_resource) # Get forms info if m_forms: m_forms_allowed = [ url for url in m_forms if not any(x in url[0] for x in m_forbidden) ] m_forms_not_allowed = {x[0] for x in m_forms }.difference(x[0] for x in m_forms_allowed) else: m_forms_allowed = [] m_forms_not_allowed = set() if m_forms_not_allowed: Logger.log_more_verbose("Skipped forbidden forms:\n %s" % "\n ".join(sorted(m_forms_not_allowed))) # Do not follow forms out of scope m_forms_in_scope = [] m_broken = [] for url in m_forms_allowed: try: if url[0] in Config.audit_scope: m_forms_in_scope.append(url) except Exception: m_broken.append(url[0]) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable forms: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable forms:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_forms_allowed) - len( m_forms_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d forms out of scope." % m_out_of_scope_count) if m_forms_in_scope: Logger.log_verbose("Found %d forms in URL: %s" % (len(m_forms_in_scope), m_url)) else: Logger.log_more_verbose("No forms found in URL: %s" % m_url) # Convert to URL data type for u in m_forms_in_scope: try: url = u[0] method = u[1] params = {x["name"]: x["value"] for x in u[2]} m_resource = URL(url=url, referer=m_url, method=method, post_params=params) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def payload_muntants(url_info, payload = {}, bmethod = 'GET', exclude_cgi_suffix = ['css', 'js', 'jpeg', 'jpg', 'png', 'gif', 'svg', 'txt'], use_cache = None, timeout = 10.0 , bcheck_use_orig_body = True, req_header = {}, resp_code = '200', resp_header = {}, **kwargs): ''' :param url_info: :param payload: {'k':'id', 'pos': 1, 'payload':str, 'type': 0} (pos:0 key, pos:1 value) (type:0 append, type:1 replace) :param exclude_cgi_suffix: :param depth: :param bcheck_use_orig_body: :param req_header: :param resp_code: :param resp_header: :param kwargs: :return: ''' if not isinstance(url_info , URL): raise TypeError("Expected url object, type:%s" % type(url_info)) if not isinstance(payload, dict): raise TypeError("Excepted payload object, type:%s" % type(payload)) if url_info.parsed_url.extension[1:] in exclude_cgi_suffix: Logger.log_verbose("Skipping URL: %s" % url_info.url) m_url_info = copy(url_info) if bmethod == "GET": param_dict = copy(m_url_info.url_params) elif bmethod == "POST": param_dict = copy(m_url_info.post_params) if len(param_dict) == None and len(param_dict) == 0: return None __ = parse_url(m_url_info.url) k = payload['k'] if payload['pos'] == 1: #value if payload['type'] == 0: #append param_dict[k] = param_dict[k] + payload['payload'] elif payload['type'] == 1: #replace param_dict[k] = payload['payload'] else: #key 先不考虑key值 if payload['type'] == 0: param_dict.update(k = param_dict.pop(k)) # TODO GET/POST param key need deal raise ValueError("GET/POST param key payload is not support!") retry_cnt = 0 while retry_cnt < 3: if bmethod == "GET": m_resource_url_payload = URL(url = __.request_cgi, method = m_url_info.method, referer = m_url_info.referer, url_params= param_dict) elif bmethod == "POST": m_resource_url_payload = URL(url = __.request_cgi, method = m_url_info.method, referer = m_url_info.referer, post_params= param_dict) try: p = get_request(url = m_resource_url_payload, allow_redirects=False, use_cache = use_cache, timeout = timeout) return p except NetworkException, e: retry_cnt += 1 time.sleep(0.5) Logger.log_error_verbose("Error while processing %r: %s" % (m_resource_url_payload.url, str(e)))
def run(self, info): # Get the path to the Nikto scanner and the configuration file. nikto_script, config = self.get_nikto() # Build the command line arguments. # The -output argument will be filled by run_nikto. args = [ "-host", info.hostname, "-ssl" if info.is_https else "-nossl", "-port", str(info.parsed_url.port), "-Format", "csv", "-ask", "no", "-nointeractive", ##"-useproxy", ] for option in ("Pause", "timeout", "Tuning", "Plugins"): value = Config.plugin_args.get(option.lower(), "") value = value.replace("\r", "") value = value.replace("\n", "") value = value.replace("\t", "") value = value.replace(" ", "") if value: args.extend(["-" + option, value]) # Create a temporary output file. with tempfile(suffix=".csv") as output: # Append the output file name to the arguments. args.append("-output") args.append(output) # If we need to set the proxy or the cookies, we'll have to create # a temporary config file with the modified settings, since there's # no way of passing these options through the command line. if Config.audit_config.proxy_addr or Config.audit_config.cookie: # Make sure we have a config file. if not config: raise ValueError("Missing configuration file!") # Create a temporary config file. with tempfile(suffix=".conf") as tmp_config: # Open the original config file. with open(config, "rU") as src: # Open the new config file. with open(tmp_config, "w") as dst: # Copy the contents of the original config file. dst.write(src.read()) # Append the new settings. proxy_addr = Config.audit_config.proxy_addr if proxy_addr: parsed = parse_url(proxy_addr) dst.write("PROXYHOST=%s\n" % parsed.host) dst.write("PROXYPORT=%s\n" % parsed.port) if Config.audit_config.proxy_user: dst.write("PROXYUSER=%s\n" % Config.audit_config.proxy_user) if Config.audit_config.proxy_pass: dst.write("PROXYPASS=%s\n" % Config.audit_config.proxy_pass) cookie_dict = Config.audit_config.cookie if cookie_dict: cookie = ";".join( '"%s=%s"' % x for x in cookie_dict.iteritems()) dst.write("STATIC-COOKIE=%s\n" % cookie) # Set the new config file. args = ["-config", tmp_config] + args # Run Nikto and parse the output. return self.run_nikto(info, output, nikto_script, args) # Otherwise, just use the supplied config file. else: if config: args = ["-config", config] + args # Run Nikto and parse the output. return self.run_nikto(info, output, nikto_script, args)
def payload_muntants(url_info, payload={}, bmethod='GET', exclude_cgi_suffix=[ 'css', 'js', 'jpeg', 'jpg', 'png', 'gif', 'svg', 'txt' ], use_cache=None, timeout=10.0, bcheck_use_orig_body=True, req_header={}, resp_code='200', resp_header={}, **kwargs): ''' :param url_info: :param payload: {'k':'id', 'pos': 1, 'payload':str, 'type': 0} (pos:0 key, pos:1 value) (type:0 append, type:1 replace) :param exclude_cgi_suffix: :param depth: :param bcheck_use_orig_body: :param req_header: :param resp_code: :param resp_header: :param kwargs: :return: ''' if not isinstance(url_info, URL): raise TypeError("Expected url object, type:%s" % type(url_info)) if not isinstance(payload, dict): raise TypeError("Excepted payload object, type:%s" % type(payload)) if url_info.parsed_url.extension[1:] in exclude_cgi_suffix: Logger.log_verbose("Skipping URL: %s" % url_info.url) m_url_info = copy(url_info) if bmethod == "GET": param_dict = copy(m_url_info.url_params) elif bmethod == "POST": param_dict = copy(m_url_info.post_params) if len(param_dict) == None and len(param_dict) == 0: return None __ = parse_url(m_url_info.url) k = payload['k'] if payload['pos'] == 1: #value if payload['type'] == 0: #append param_dict[k] = param_dict[k] + payload['payload'] elif payload['type'] == 1: #replace param_dict[k] = payload['payload'] else: #key 先不考虑key值 if payload['type'] == 0: param_dict.update(k=param_dict.pop(k)) # TODO GET/POST param key need deal raise ValueError("GET/POST param key payload is not support!") retry_cnt = 0 while retry_cnt < 3: if bmethod == "GET": m_resource_url_payload = URL(url=__.request_cgi, method=m_url_info.method, referer=m_url_info.referer, url_params=param_dict) elif bmethod == "POST": m_resource_url_payload = URL(url=__.request_cgi, method=m_url_info.method, referer=m_url_info.referer, post_params=param_dict) try: p = get_request(url=m_resource_url_payload, allow_redirects=False, use_cache=use_cache, timeout=timeout) return p except NetworkException, e: retry_cnt += 1 time.sleep(0.5) Logger.log_error_verbose("Error while processing %r: %s" % (m_resource_url_payload.url, str(e)))
class Spider(TestingPlugin): """ This plugin is a web spider. """ #---------------------------------------------------------------------- def get_accepted_info(self): return [Url] #---------------------------------------------------------------------- def recv_info(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links if p.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(p.raw_data, m_url) else: m_links = extract_from_text(p.raw_data, m_url) try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_in_scope), m_url)) else: Logger.log_verbose("No links found in URL: %s" % m_url) # Convert to Url data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = Url(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def run(self, info): # Get the path to the Nikto scanner and the configuration file. nikto_script, config = self.get_nikto() # Build the command line arguments. # The -output argument will be filled by run_nikto. args = [ "-host", info.hostname, "-ssl" if info.is_https else "-nossl", "-port", str(info.parsed_url.port), "-Format", "csv", "-ask", "no", "-nointeractive", ##"-useproxy", ] for option in ("Pause", "timeout", "Tuning", "Plugins"): value = Config.plugin_args.get(option.lower(), "") value = value.replace("\r", "") value = value.replace("\n", "") value = value.replace("\t", "") value = value.replace(" ", "") if value: args.extend(["-" + option, value]) # Create a temporary output file. with tempfile(suffix = ".csv") as output: # Append the output file name to the arguments. args.append("-output") args.append(output) # If we need to set the proxy or the cookies, we'll have to create # a temporary config file with the modified settings, since there's # no way of passing these options through the command line. if Config.audit_config.proxy_addr or Config.audit_config.cookie: # Make sure we have a config file. if not config: raise ValueError("Missing configuration file!") # Create a temporary config file. with tempfile(suffix = ".conf") as tmp_config: # Open the original config file. with open(config, "rU") as src: # Open the new config file. with open(tmp_config, "w") as dst: # Copy the contents of the original config file. dst.write( src.read() ) # Append the new settings. proxy_addr = Config.audit_config.proxy_addr if proxy_addr: parsed = parse_url(proxy_addr) dst.write("PROXYHOST=%s\n" % parsed.host) dst.write("PROXYPORT=%s\n" % parsed.port) if Config.audit_config.proxy_user: dst.write("PROXYUSER=%s\n" % Config.audit_config.proxy_user) if Config.audit_config.proxy_pass: dst.write("PROXYPASS=%s\n" % Config.audit_config.proxy_pass) cookie_dict = Config.audit_config.cookie if cookie_dict: cookie = ";".join( '"%s=%s"' % x for x in cookie_dict.iteritems() ) dst.write("STATIC-COOKIE=%s\n" % cookie) # Set the new config file. args = ["-config", tmp_config] + args # Run Nikto and parse the output. return self.run_nikto(info, output, nikto_script, args) # Otherwise, just use the supplied config file. else: if config: args = ["-config", config] + args # Run Nikto and parse the output. return self.run_nikto(info, output, nikto_script, args)
def run(self, info): # Query PunkSPIDER. host_id = info.hostname host_id = parse_url(host_id).hostname host_id = ".".join(reversed(host_id.split("."))) d = self.query_punkspider(host_id) # Stop if we have no results. if not d: Logger.log("No results found for host: %s" % info.hostname) return # This is where we'll collect the data we'll return. results = [] # For each vulnerability... for v in d["data"]: try: # Future-proof checks. if v["protocol"] not in ("http", "https"): Logger.log_more_verbose( "Skipped non-web vulnerability: %s" % to_utf8(v["id"])) continue if v["bugType"] not in ("xss", "sqli", "bsqli"): Logger.log_more_verbose( "Skipped unknown vulnerability type: %s" % to_utf8(v["bugType"])) continue # Get the vulnerable URL, parameter and payload. url = to_utf8(v["vulnerabilityUrl"]) param = to_utf8(v["parameter"]) parsed = parse_url(url) payload = parsed.query_params[param] # Get the level. level = to_utf8(v["level"]) # Create the URL object. url_o = URL(url) results.append(url_o) # Get the vulnerability class. if v["bugType"] == "xss": clazz = XSS else: clazz = SQLInjection # Create the Vulnerability object. vuln = clazz( url_o, vulnerable_params={param: payload}, injection_point=clazz.INJECTION_POINT_URL, injection_type=to_utf8(v["bugType"]), # FIXME level=level, tool_id=to_utf8(v["id"]), ) print '------------' print vuln print type(vuln) print '------------' results.append(vuln) # Log errors. except Exception, e: tb = traceback.format_exc() Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(tb)
def sf_SSL_CERTIFICATE_MISMATCH(self, sf_module, source, raw_data): domain = Domain(parse_url(source).host) vulnerability = InvalidCertificate( # XXX or is it InvalidCommonName? domain, tool_id = sf_module) return domain, vulnerability
def sf_SSL_CERTIFICATE_EXPIRED(self, sf_module, source, raw_data): domain = Domain(parse_url(source).host) vulnerability = OutdatedCertificate( domain, tool_id = sf_module) return domain, vulnerability
def run(self, info): # Query PunkSPIDER. host_id = info.hostname host_id = parse_url(host_id).hostname host_id = ".".join(reversed(host_id.split("."))) d = self.query_punkspider(host_id) # Stop if we have no results. if not d: Logger.log("No results found for host: %s" % info.hostname) return # This is where we'll collect the data we'll return. results = [] # For each vulnerability... for v in d["data"]: try: # Future-proof checks. if v["protocol"] not in ("http", "https"): Logger.log_more_verbose( "Skipped non-web vulnerability: %s" % to_utf8(v["id"])) continue if v["bugType"] not in ("xss", "sqli", "bsqli"): Logger.log_more_verbose( "Skipped unknown vulnerability type: %s" % to_utf8(v["bugType"])) continue # Get the vulnerable URL, parameter and payload. url = to_utf8(v["vulnerabilityUrl"]) param = to_utf8(v["parameter"]) parsed = parse_url(url) payload = parsed.query_params[param] # Get the level. level = to_utf8(v["level"]) # Create the URL object. url_o = URL(url) results.append(url_o) # Get the vulnerability class. if v["bugType"] == "xss": clazz = XSS else: clazz = SQLInjection # Create the Vulnerability object. vuln = clazz( url_o, vulnerable_params = { param: payload }, injection_point = clazz.INJECTION_POINT_URL, injection_type = to_utf8(v["bugType"]), # FIXME level = level, tool_id = to_utf8(v["id"]), ) results.append(vuln) # Log errors. except Exception, e: tb = traceback.format_exc() Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(tb)
def check_download(self, url, name, content_length, content_type): # Only accept content when the content type header is present. if not content_type: Logger.log_more_verbose( "Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose( "Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. parsed_url = parse_url(url) if not parsed_url.filename: # Approved! return True # Extension absent. if not parsed_url.extension: # Approved! return True # Match against a known list of valid HTML extensions. # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage if parsed_url.extension in ( ".xml", ".html", ".htm", ".xhtml", ".xht", ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml", ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso", ".lassoapp", ".pl", ".php", ".php3", ".phtml", ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom", ".xml", ".eml", ".jsonld", ".metalink", ".met", ".rss", ".xml", ".markdown"): # Approved! return True # If URL path in blacklist? m_forbidden = [x for x in WordListLoader.get_wordlist(Config.plugin_config["wordlist_no_spider"])] if any(x in url for x in m_forbidden): return False # Success! return True
def check_download(self, url, name, content_length, content_type): # Only accept content when the content type header is present. if not content_type: Logger.log_more_verbose("Skipping URL, missing content type: %s" % url) return False # Is the content length present? if content_length is not None: # Check the file doesn't have 0 bytes. if content_length <= 0: Logger.log_more_verbose("Skipping URL, empty content: %s" % url) return False # Check the file is not too big. if content_type.strip().lower().startswith("text/"): if content_length > 100000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False else: if content_length > 5000000: Logger.log_more_verbose( "Skipping URL, content too large (%d bytes): %s" % (content_length, url)) return False # Approved! return True # Content length absent but likely points to a directory index. parsed_url = parse_url(url) if not parsed_url.filename: # Approved! return True # Extension absent. if not parsed_url.extension: # Approved! return True # Match against a known list of valid HTML extensions. # See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage if parsed_url.extension in (".xml", ".html", ".htm", ".xhtml", ".xht", ".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml", ".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso", ".lassoapp", ".pl", ".php", ".php3", ".phtml", ".rna", ".r", ".rnx", ".shtml", ".stm", ".atom", ".xml", ".eml", ".jsonld", ".metalink", ".met", ".rss", ".xml", ".markdown"): # Approved! return True # If URL path in blacklist? m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] if any(x in url for x in m_forbidden): return False # Success! return True
def test_basic_urls(): print "Testing basic URL parsing..." for url in basic: ##pprint(parse_url(url).url) assert parse_url(url).url == url
def _benchmark(): return parse_url( 'http://example.com/path?query=string¶m=value&orphan#fragment_id' ).url
def parse_results(openvas_results, ip=None): """ Convert the OpenVAS scan results to the GoLismero data model. :param openvas_results: OpenVAS scan results. :type openvas_results: list(OpenVASResult) :param ip: (Optional) IP address to link the vulnerabilities to. :type ip: IP | None :returns: Scan results converted to the GoLismero data model. :rtype: list(Data) """ # This is where we'll store the results. results = [] # Remember the hosts we've seen so we don't create them twice. hosts_seen = {} # Maps of OpenVAS levels to GoLismero levels. LEVELS = { 'debug': 'informational', 'log': 'informational', 'low': "low", 'medium': 'middle', 'high': "high", } RISKS = { 'none': 0, 'debug': 0, 'log': 0, 'low': 1, 'medium': 2, 'high': 3, 'critical': 4 } # Do we have the OpenVAS plugin database? if not os.path.exists(openvas_db): Logger.log_error( "OpenVAS plugin not initialized, please run setup.py") return # Load the database. with open(openvas_db, "rb") as f: use_openvas_db = Pickler.load(f) # Get the configuration. import_log = Config.audit_config.boolean( Config.plugin_args.get("import_log", "no")) import_debug = Config.audit_config.boolean( Config.plugin_args.get("import_debug", "no")) # For each OpenVAS result... for opv in openvas_results: try: # Get the host. host = opv.host # Skip if we don't have a target host. if host is None: continue # Get the threat level. threat = getattr(opv, "threat", "log").lower() # Discard log and debug entries, keep only the vulnerabilities. if threat == "log" and not import_log: continue if threat == "debug" and not import_debug: continue # Get or create the vulnerable resource. target = ip if host in hosts_seen: target = hosts_seen[host] elif not ip or ip.address != host: try: target = IP(host) except ValueError: target = Domain(host) hosts_seen[host] = target results.append(target) # Get the vulnerability description. description = opv.description if not description: description = nvt.description if not description: description = nvt.summary if not description: description = None # Extract the relevant information from the results. nvt = opv.nvt vid = opv.id oid = int(nvt.oid.split(".")[-1]) name = getattr(nvt, "name", None) cvss_base = getattr(nvt, "cvss_base", None) level = LEVELS.get(threat, "informational") risk = RISKS.get( getattr(opv.nvt, "risk_factor", "none").lower(), 0) # Extract the CVEs and Bugtraq IDs. cve = nvt.cve.split(", ") if nvt.cve else [] if "NOCVE" in cve: cve.remove("NOCVE") bid = [] if nvt.bid: bid.extend("BID-" + x for x in nvt.bid.split(", ")) if nvt.bugtraq: bid.extend("BID-" + x for x in nvt.bugtraq.split(", ")) if "NOBID" in bid: cve.remove("NOBID") # Extract the notes and add them to the description text. if opv.notes and description is not None: description += "\n" + "\n".join( " - " + note.text for note in opv.notes ) # Extract the reference URLs from the description text. references = [] if description is not None: p = description.find("URL:") while p >= 0: p += 4 q2 = description.find("\n", p) q1 = description.find(",", p, q2) if q1 > p: q = q1 else: q = q2 if q < p: q = len(description) url = description[p:q].strip() try: url = parse_url(url).url references.append(url) except Exception: Logger.log_error(format_exc()) pass p = description.find("URL:", q) # Prepare the vulnerability properties. kwargs = { "title": name, "description": description, "references": references, "level": level, "risk": risk, "severity": risk, "impact": risk, "cvss_base": cvss_base, "cve": cve, "bid": bid, "tool_id": "openvas_plugin_%s" % oid, "custom_id": vid, } # If we have the OpenVAS plugin database, look up the plugin ID # that reported this vulnerability and create the vulnerability # using a specific class. Otherwise use the vulnerability class # for uncategorized vulnerabilities. classname = "UncategorizedVulnerability" if oid in use_openvas_db: classname = use_openvas_db[oid][0][0] # Create the Vulnerability object. try: clazz = globals()[classname] vuln = clazz(target, **kwargs) except Exception, e: t = format_exc() Logger.log_error_more_verbose( "Could not load vulnerability of type: %s" % classname) Logger.log_error_more_verbose(t) vuln = UncategorizedVulnerability(target, **kwargs) results.append(vuln) # Skip this result on error. except Exception, e: t = format_exc() Logger.log_error_verbose( "Error parsing OpenVAS results: %s" % str(e)) Logger.log_error_more_verbose(t)
def parse_results(openvas_results, ip=None): """ Convert the OpenVAS scan results to the GoLismero data model. :param openvas_results: OpenVAS scan results. :type openvas_results: list(OpenVASResult) :param ip: (Optional) IP address to link the vulnerabilities to. :type ip: IP | None :returns: Scan results converted to the GoLismero data model. :rtype: list(Data) """ # This is where we'll store the results. results = [] # Remember the hosts we've seen so we don't create them twice. hosts_seen = {} # Maps of OpenVAS levels to GoLismero levels. LEVELS = { 'debug': 'informational', 'log': 'informational', 'low': "low", 'medium': 'middle', 'high': "high", } RISKS = { 'none': 0, 'debug': 0, 'log': 0, 'low': 1, 'medium': 2, 'high': 3, 'critical': 4 } # Do we have the OpenVAS plugin database? if not os.path.exists(openvas_db): Logger.log_error( "OpenVAS plugin not initialized, please run setup.py") return # Load the database. with open(openvas_db, "rb") as f: use_openvas_db = Pickler.load(f) # Get the configuration. import_log = Config.audit_config.boolean( Config.plugin_args.get("import_log", "no")) import_debug = Config.audit_config.boolean( Config.plugin_args.get("import_debug", "no")) # For each OpenVAS result... for opv in openvas_results: try: # Get the host. host = opv.host # Skip if we don't have a target host. if host is None: continue # Get the threat level. threat = getattr(opv, "threat", "log").lower() # Discard log and debug entries, keep only the vulnerabilities. if threat == "log" and not import_log: continue if threat == "debug" and not import_debug: continue # Get or create the vulnerable resource. target = ip if host in hosts_seen: target = hosts_seen[host] elif not ip or ip.address != host: try: target = IP(host) except ValueError: target = Domain(host) hosts_seen[host] = target results.append(target) # Extract the relevant information from the results. nvt = opv.nvt vid = opv.id oid = int(nvt.oid.split(".")[-1]) name = getattr(nvt, "name", None) cvss_base = getattr(nvt, "cvss_base", None) level = LEVELS.get(threat, "informational") risk = RISKS.get( getattr(opv.nvt, "risk_factor", "none").lower(), 0) # Get the vulnerability description. description = opv.raw_description if not description: description = nvt.description if not description: description = nvt.summary if not description: description = None # Extract the CVEs and Bugtraq IDs. cve = nvt.cve.split(", ") if nvt.cve else [] if "NOCVE" in cve: cve.remove("NOCVE") bid = [] if nvt.bid: bid.extend("BID-" + x for x in nvt.bid.split(", ")) if nvt.bugtraq: bid.extend("BID-" + x for x in nvt.bugtraq.split(", ")) if "NOBID" in bid: cve.remove("NOBID") # Extract the notes and add them to the description text. if opv.notes and description is not None: description += "\n" + "\n".join(" - " + note.text for note in opv.notes) # Extract the reference URLs from the description text. references = [] if description is not None: p = description.find("URL:") while p >= 0: p += 4 q2 = description.find("\n", p) q1 = description.find(",", p, q2) if q1 > p: q = q1 else: q = q2 if q < p: q = len(description) url = description[p:q].strip() try: url = parse_url(url).url references.append(url) except Exception: Logger.log_error(format_exc()) pass p = description.find("URL:", q) # Prepare the vulnerability properties. kwargs = { "title": name, "description": description, "references": references, "level": level, "risk": risk, "severity": risk, "impact": risk, "cvss_base": cvss_base, "cve": cve, "bid": bid, "tool_id": "openvas_plugin_%s" % oid, "custom_id": vid, } # If we have the OpenVAS plugin database, look up the plugin ID # that reported this vulnerability and create the vulnerability # using a specific class. Otherwise use the vulnerability class # for uncategorized vulnerabilities. classname = "UncategorizedVulnerability" if oid in use_openvas_db: classname = use_openvas_db[oid][0][0] # Create the Vulnerability object. try: clazz = globals()[classname] vuln = clazz(target, **kwargs) except Exception, e: t = format_exc() Logger.log_error_more_verbose( "Could not load vulnerability of type: %s" % classname) Logger.log_error_more_verbose(t) vuln = UncategorizedVulnerability(target, **kwargs) results.append(vuln) # Skip this result on error. except Exception, e: t = format_exc() Logger.log_error_verbose("Error parsing OpenVAS results: %s" % str(e)) Logger.log_error_more_verbose(t)
def _benchmark(): return parse_url("http://example.com/path?query=string¶m=value&orphan#fragment_id").url