def filtered_freq_generator(freq_list): already_tested = ScalableBloomFilter() for freq in freq_list: if freq not in already_tested: already_tested.add(freq) yield freq
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._already_crawled = ScalableBloomFilter() self._already_verified = ScalableBloomFilter() # User configured parameters self._max_depth = 3
def test_bloom_filter(self): f = ScalableBloomFilter() for i in xrange(20000): data = (i, i) f.add(data) for i in xrange(20000): data = (i, i) data in f
def __init__(self): CrawlPlugin.__init__(self) # User configured parameters self._wordlist = os.path.join(ROOT_PATH, "plugins", "crawl", "content_negotiation", "common_filenames.db") # Internal variables self._already_tested_dir = ScalableBloomFilter() self._already_tested_resource = ScalableBloomFilter() self._content_negotiation_enabled = None self._to_bruteforce = Queue.Queue() # I want to try 3 times to see if the remote host is vulnerable # detection is not thaaaat accurate! self._tries_left = 3
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() self._analyzed_filenames = ScalableBloomFilter() self._dvcs = {} self._dvcs['git repository'] = {} self._dvcs['git ignore'] = {} self._dvcs['hg repository'] = {} self._dvcs['hg ignore'] = {} self._dvcs['bzr repository'] = {} self._dvcs['bzr ignore'] = {} self._dvcs['svn repository'] = {} self._dvcs['svn ignore'] = {} self._dvcs['cvs repository'] = {} self._dvcs['cvs ignore'] = {} self._dvcs['git repository']['filename'] = '.git/index' self._dvcs['git repository']['function'] = self.git_index self._dvcs['git ignore']['filename'] = '.gitignore' self._dvcs['git ignore']['function'] = self.ignore_file self._dvcs['hg repository']['filename'] = '.hg/dirstate' self._dvcs['hg repository']['function'] = self.hg_dirstate self._dvcs['hg ignore']['filename'] = '.hgignore' self._dvcs['hg ignore']['function'] = self.ignore_file self._dvcs['bzr repository']['filename'] = '.bzr/checkout/dirstate' self._dvcs['bzr repository']['function'] = self.bzr_checkout_dirstate self._dvcs['bzr ignore']['filename'] = '.bzrignore' self._dvcs['bzr ignore']['function'] = self.ignore_file self._dvcs['svn repository']['filename'] = '.svn/entries' self._dvcs['svn repository']['function'] = self.svn_entries self._dvcs['svn ignore']['filename'] = '.svnignore' self._dvcs['svn ignore']['function'] = self.ignore_file self._dvcs['cvs repository']['filename'] = 'CVS/Entries' self._dvcs['cvs repository']['function'] = self.cvs_entries self._dvcs['cvs ignore']['filename'] = '.cvsignore' self._dvcs['cvs ignore']['function'] = self.ignore_file
def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict(table_prefix='html_comments') self._already_reported = ScalableBloomFilter() self._end_was_called = False
def __init__(self): CrawlPlugin.__init__(self) self._headers = None self._first_time = True self._fuzz_images = False self._seen = ScalableBloomFilter()
def __init__(self): GrepPlugin.__init__(self) self._already_reported = ScalableBloomFilter() # regex to split between words self._split_re = re.compile('[^\w]')
def __init__(self): CrawlPlugin.__init__(self) self._already_visited = ScalableBloomFilter() # User options self._fuzz_images = False self._max_digit_sections = 4
def __init__(self): CrawlPlugin.__init__(self) # internal variables self._exec = True self._already_analyzed = ScalableBloomFilter() # User configured parameters self._db_file = os.path.join(ROOT_PATH, 'plugins', 'crawl', 'pykto', 'scan_database.db') self._extra_db_file = os.path.join(ROOT_PATH, 'plugins', 'crawl', 'pykto', 'w3af_scan_database.db') self._cgi_dirs = ['/cgi-bin/'] self._admin_dirs = ['/admin/', '/adm/'] self._users = ['adm', 'bin', 'daemon', 'ftp', 'guest', 'listen', 'lp', 'mysql', 'noaccess', 'nobody', 'nobody4', 'nuucp', 'operator', 'root', 'smmsp', 'smtp', 'sshd', 'sys', 'test', 'unknown'] self._nuke = ['/', '/postnuke/', '/postnuke/html/', '/modules/', '/phpBB/', '/forum/'] self._mutate_tests = False
def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._first_exec = True self._already_queried = ScalableBloomFilter() self._can_resolve_domain_names = False
def __init__(self, grep_plugins, w3af_core): """ :param grep_plugins: Instances of grep plugins in a list :param w3af_core: The w3af core that we'll use for status reporting """ # max_in_queue_size, is the number of items that will be stored in-memory # in the consumer queue # # Any items exceeding max_in_queue_size will be stored on-disk, which # is slow but will prevent any high memory usage imposed by this part # of the framework max_in_queue_size = 20 # thread_pool_size defines how many threads we'll use to run grep plugins thread_pool_size = 2 # max_pool_queued_tasks defines how many tasks we'll keep in memory waiting # for a worker from the pool to be available max_pool_queued_tasks = thread_pool_size * 3 super(grep, self).__init__(grep_plugins, w3af_core, create_pool=False, #max_pool_queued_tasks=max_pool_queued_tasks, #thread_pool_size=thread_pool_size, thread_name='Grep', max_in_queue_size=max_in_queue_size) self._already_analyzed = ScalableBloomFilter()
class frontpage_version(InfrastructurePlugin): """ Search FrontPage Server Info file and if it finds it will determine its version. :author: Viktor Gazdag ( [email protected] ) """ VERSION_RE = re.compile('FPVersion="(.*?)"', re.IGNORECASE) ADMIN_URL_RE = re.compile('FPAdminScriptUrl="(.*?)"', re.IGNORECASE) AUTHOR_URL_RE = re.compile('FPAuthorScriptUrl="(.*?)"', re.IGNORECASE) def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() @runonce(exc_class=RunOnce) def discover(self, fuzzable_request): """ For every directory, fetch a list of files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ for domain_path in fuzzable_request.get_url().get_directories(): if domain_path in self._analyzed_dirs: continue # Save the domain_path so I know I'm not working in vane self._analyzed_dirs.add(domain_path) # Request the file frontpage_info_url = domain_path.url_join("_vti_inf.html") try: response = self._uri_opener.GET(frontpage_info_url, cache=True) except BaseFrameworkException, w3: fmt = 'Failed to GET Frontpage Server _vti_inf.html file: "%s"'\ '. Exception: "%s".' om.out.debug(fmt % (frontpage_info_url, w3)) else: # Check if it's a Frontpage Info file if not is_404(response): fr = FuzzableRequest(response.get_uri()) self.output_queue.put(fr) self._analyze_response(response)
def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._already_tested = ScalableBloomFilter() # On real web applications, if we can't trigger an error in the first # MAX_TESTS tests, it simply won't happen and we have to stop testing. self.MAX_TESTS = 25
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() # -rw-r--r-- 1 andresr w3af 8139 Apr 12 13:23 foo.zip regex_str = '[a-z-]{10}\s*\d+\s*(.*?)\s+(.*?)\s+\d+\s+\w+\s+\d+\s+[0-9:]{4,5}\s+(.*)' self._listing_parser_re = re.compile(regex_str)
class blank_body(GrepPlugin): """ Find responses with empty body. :author: Andres Riancho ([email protected]) """ METHODS = ('GET', 'POST') HTTP_CODES = (401, 304, 302, 301, 204, 405) def __init__(self): GrepPlugin.__init__(self) self.already_reported = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point, find the blank bodies and report them. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if response.get_body() == '' and request.get_method() in self.METHODS\ and response.get_code() not in self.HTTP_CODES\ and not response.get_headers().icontains('location')\ and response.get_url().uri2url() not in self.already_reported: self.already_reported.add(response.get_url().uri2url()) desc = 'The URL: "%s" returned an empty body, this could indicate'\ ' an application error.' desc = desc % response.get_url() i = Info('Blank http response body', desc, response.id, self.get_name()) i.set_url(response.get_url()) self.kb_append(self, 'blank_body', i) def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): # # Set the opener, I need it to perform some tests and gain # the knowledge about the server's 404 response bodies. # self._uri_opener = None self._worker_pool = None # # Internal variables # self._already_analyzed = False self._404_bodies = [] self._lock = thread.allocate_lock() self._fingerprinted_paths = ScalableBloomFilter() self._directory_uses_404_codes = ScalableBloomFilter() # It is OK to store 200 here, I'm only storing path+filename as the key, # and bool as the value. self.is_404_LRU = LRU(200)
def __init__(self): self._variants = CachedDiskDict(max_in_memory=self.MAX_IN_MEMORY, table_prefix='variant_db') self._variants_eq = ScalableBloomFilter() self._variants_form = CachedDiskDict(max_in_memory=self.MAX_IN_MEMORY, table_prefix='variant_db_form') self.params_max_variants = cf.cf.get('params_max_variants') self.path_max_variants = cf.cf.get('path_max_variants') self.max_equal_form_variants = cf.cf.get('max_equal_form_variants') self._db_lock = threading.RLock()
def __init__(self): InfrastructurePlugin.__init__(self) # Already analyzed extensions self._already_analyzed_ext = ScalableBloomFilter() # Internal DB self._db_file = os.path.join(ROOT_PATH, 'plugins', 'infrastructure', 'php_eggs', 'eggs.json') # Get data from external JSON file and fill EGG_DB array data = self.read_jsondata(self._db_file) self.EGG_DB = self.fill_egg_array(data)
def __init__(self): GrepPlugin.__init__(self) # For more info regarding this regular expression, please see: # https://sourceforge.net/mailarchive/forum.php?thread_name=1955593874.20090122023644%40 #mlists.olympos.org&forum_name=w3af-develop regex_str = '(?<!\.)(?<!\d)(?:(?:10|127)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|192\.168|169\.' regex_str += '254|172\.0?(?:1[6-9]|2[0-9]|3[01]))(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-' regex_str += '9]?)){2}(?!\d)(?!\.)' self._private_ip_address = re.compile(regex_str) self._regex_list = [self._private_ip_address, ] self._already_inspected = ScalableBloomFilter() self._ignore_if_match = None
def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._exec = True self._already_tested = ScalableBloomFilter() # Methods self._supported_methods = self.DAV_METHODS | self.COMMON_METHODS | \ self.UNCOMMON_METHODS | self.PROPOSED_METHODS | \ self.EXTRA_METHODS | self.VERSION_CONTROL # User configured variables self._exec_one_time = True self._report_dav_only = True
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet(table_prefix='web_spider') self._first_run = True self._target_urls = [] self._target_domain = None self._already_filled_form = ScalableBloomFilter() self._variant_db = VariantDB() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._compile_re()
def __init__(self): # # Set the opener, I need it to perform some tests and gain # the knowledge about the server's 404 response bodies. # self._uri_opener = None self._worker_pool = None # # Internal variables # self._already_analyzed = False self._404_responses = DiskDeque(maxsize=MAX_404_RESPONSES) self._lock = thread.allocate_lock() self._directory_uses_404_codes = ScalableBloomFilter() # It is OK to store 200 here, I'm only storing path+filename as the key, # and bool as the value. self.is_404_LRU = SynchronizedLRUDict(250)
def __init__(self): CrawlPlugin.__init__(self) # internal variables self._exec = True self._already_analyzed = ScalableBloomFilter() # User configured parameters self._db_file = os.path.join(ROOT_PATH, "plugins", "crawl", "pykto", "scan_database.db") self._extra_db_file = os.path.join(ROOT_PATH, "plugins", "crawl", "pykto", "w3af_scan_database.db") self._cgi_dirs = ["/cgi-bin/"] self._admin_dirs = ["/admin/", "/adm/"] self._users = [ "adm", "bin", "daemon", "ftp", "guest", "listen", "lp", "mysql", "noaccess", "nobody", "nobody4", "nuucp", "operator", "root", "smmsp", "smtp", "sshd", "sys", "test", "unknown", ] self._nuke = ["/", "/postnuke/", "/postnuke/html/", "/modules/", "/phpBB/", "/forum/"] self._mutate_tests = False
def __init__(self): AuditPlugin.__init__(self) # Internal variables self._already_tested_dirs = ScalableBloomFilter()
class find_backdoors(CrawlPlugin): """ Find web backdoors and web shells. :author: Andres Riancho ([email protected]) """ WEBSHELL_DB = os.path.join(ROOT_PATH, 'plugins', 'crawl', 'find_backdoors', 'web_shells.txt') def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() def crawl(self, fuzzable_request): """ For every directory, fetch a list of shell files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ domain_path = fuzzable_request.get_url().get_domain_path() if domain_path not in self._analyzed_dirs: self._analyzed_dirs.add(domain_path) # Read the web shell database web_shells = self._iter_web_shells() # Send the requests using threads: args_iter = (domain_path.url_join(fname) for fname in web_shells) self.worker_pool.map(self._check_if_exists, args_iter) def _iter_web_shells(self): """ :yield: lines from the web shell DB """ for line in file(self.WEBSHELL_DB): if line.startswith('#'): continue if not line: continue yield line.strip() def _check_if_exists(self, web_shell_url): """ Check if the file exists. :param web_shell_url: The URL to check """ try: response = self._uri_opener.GET(web_shell_url, cache=True) except BaseFrameworkException: om.out.debug('Failed to GET webshell:' + web_shell_url) else: if self._is_possible_backdoor(response): desc = 'A web backdoor was found at: "%s"; this could ' \ 'indicate that the server has been compromised.' desc = desc % response.get_url() v = Vuln('Potential web backdoor', desc, severity.HIGH, response.id, self.get_name()) v.set_url(response.get_url()) kb.kb.append(self, 'backdoors', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) fr = FuzzableRequest.from_http_response(response) self.output_queue.put(fr) def _is_possible_backdoor(self, response): """ Heuristic to infer if the content of <response> has the pattern of a backdoor response. :param response: HTTPResponse object :return: A bool value """ if is_404(response): return False body_text = response.get_body() dom = response.get_dom() if dom is not None: for ele, attrs in BACKDOOR_COLLECTION.iteritems(): for attrname, attr_vals in attrs.iteritems(): # Set of lowered attribute values dom_attr_vals = \ set(n.get(attrname).lower() for n in (dom.xpath('//%s[@%s]' % (ele, attrname)))) # If at least one elem in intersection return True if dom_attr_vals and set(attr_vals): return True # If no regex matched then try with keywords. At least 2 should be # contained in 'body_text' to succeed. times = 0 for back_kw in KNOWN_OFFENSIVE_WORDS: if re.search(back_kw, body_text, re.I): times += 1 if times == 2: return True return False def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): GrepPlugin.__init__(self) self.already_reported = ScalableBloomFilter()
class strange_parameters(GrepPlugin): """ Grep the HTML response and find URIs that have strange parameters. :author: Andres Riancho (([email protected])) """ def __init__(self): GrepPlugin.__init__(self) # Internal variables self._already_reported = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point. :param request: The HTTP request object. :param response: The HTTP response object :return: None, all results are saved in the kb. """ try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return # Note: # - With parsed_references I'm 100% that it's really something in the # HTML that the developer intended to add. # # - The re_references are the result of regular expressions, which in # some cases are just false positives. # parsed_references, _ = dp.get_references() analyzers = {self._analyze_SQL, self._analyze_strange} for ref in parsed_references: for token in ref.querystring.iter_tokens(): token_name = token.get_name() token_value = token.get_value() if (ref.uri2url(), token_name) in self._already_reported: continue for analyzer in analyzers: if analyzer(request, response, ref, token_name, token_value): # Don't repeat findings self._already_reported.add((ref.uri2url(), token_name)) break def _analyze_strange(self, request, response, ref, token_name, token_value): if self._is_strange(request, token_name, token_value): desc = ('The URI: "%s" has a parameter named: "%s" with value:' ' "%s", which is very uncommon. and requires manual' ' verification.') desc %= (response.get_uri(), token_name, token_value) i = Info('Uncommon query string parameter', desc, response.id, self.get_name()) i['parameter_value'] = token_value i.add_to_highlight(token_value) i.set_uri(ref) self.kb_append(self, 'strange_parameters', i) return True return False def _analyze_SQL(self, request, response, ref, token_name, token_value): """ To find this kind of vulns http://thedailywtf.com/Articles/Oklahoma- Leaks-Tens-of-Thousands-of-Social-Security-Numbers,-Other- Sensitive-Data.aspx :return: True if the parameter value contains SQL sentences """ for match in SQL_RE.findall(token_value): if request.sent(match): continue desc = ('The URI: "%s" has a parameter named: "%s" with value:' ' "%s", which is a SQL query.') desc %= (response.get_uri(), token_name, token_value) v = Vuln('Parameter has SQL sentence', desc, severity.LOW, response.id, self.get_name()) v['parameter_value'] = token_value v.add_to_highlight(token_value) v.set_uri(ref) self.kb_append(self, 'strange_parameters', v) return True return False def _is_strange(self, request, parameter, value): """ :return: True if the parameter value is strange """ decoded_parameter = urllib.unquote(parameter) # We don't care about URLs, these are most likely OK if decoded_parameter.startswith('http://'): return False if decoded_parameter.startswith('https://'): return False if 'wicket:' in parameter or 'wicket:' in decoded_parameter: # # The wicket framework uses, by default, strange URLs like this: # # https://www.DOMAIN.com/ # ?wicket:bookmarkablePage=:com.DOMAIN.web.pages.SignInPage # &wicket:interface=:0:signInForm::IFormSubmitListener:: # ;jsessionid=7AC76A46A86BBC3F5253E374241BC892 # # Which are strange in all cases, except from wicket! # return False # Seems to be a function _strange_parameter_re = ['\w+\(.*?\)'] for regex in _strange_parameter_re: for match in re.findall(regex, value): if not request.sent(match): return True split_value = [x for x in STRANGE_RE.split(value) if x != ''] if len(split_value) > 4: if not request.sent(value): return True return False def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): CrawlPlugin.__init__(self) self._first_time = True self._fuzz_images = False self._seen = ScalableBloomFilter()
def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict() self._already_reported_interesting = ScalableBloomFilter()
class find_vhosts(InfrastructurePlugin): """ Modify the HTTP Host header and try to find virtual hosts. :author: Andres Riancho ([email protected]) """ def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._first_exec = True self._already_queried = ScalableBloomFilter() self._can_resolve_domain_names = False def discover(self, fuzzable_request): """ Find virtual hosts. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ analysis_result = self._analyze(fuzzable_request) self._report_results(fuzzable_request, analysis_result) def _analyze(self, fuzzable_request): vhost_list = [] if self._first_exec: self._first_exec = False vhost_list.extend(self._generic_vhosts(fuzzable_request)) # I also test for ""dead links"" that the web programmer left in the page # For example, If w3af finds a link to "http://corporative.intranet.corp/" # it will try to resolve the dns name, if it fails, it will try to request # that page from the server vhost_list.extend(self._get_dead_links(fuzzable_request)) return vhost_list def _report_results(self, fuzzable_request, analysis_result): """ Report our findings """ reported = set() for vhost, request_id in analysis_result: if vhost not in reported: reported.add(vhost) domain = fuzzable_request.get_url().get_domain() desc = 'Found a new virtual host at the target web server, the ' \ 'virtual host name is: "%s". To access this site' \ ' you might need to change your DNS resolution settings in' \ ' order to point "%s" to the IP address of "%s".' desc = desc % (vhost, vhost, domain) v = Vuln.from_fr('Virtual host identified', desc, severity.LOW, request_id, self.get_name(), fuzzable_request) kb.kb.append(self, 'find_vhosts', v) om.out.information(v.get_desc()) def _get_dead_links(self, fuzzable_request): """ Find every link on a HTML document verify if the domain is reachable or not; after that, verify if the web found a different name for the target site or if we found a new site that is linked. If the link points to a dead site then report it (it could be pointing to some private address or something...) """ # Get some responses to compare later base_url = fuzzable_request.get_url().base_url() original_response = self._uri_opener.GET(fuzzable_request.get_uri(), cache=True) base_response = self._uri_opener.GET(base_url, cache=True) base_resp_body = base_response.get_body() try: dp = parser_cache.dpc.get_document_parser_for(original_response) except BaseFrameworkException: # Failed to find a suitable parser for the document return [] # Set the non existant response non_existant_response = self._get_non_exist(fuzzable_request) nonexist_resp_body = non_existant_response.get_body() # Note: # - With parsed_references I'm 100% that it's really something in the HTML # that the developer intended to add. # # - The re_references are the result of regular expressions, which in some cases # are just false positives. # # In this case, and because I'm only going to use the domain name of the URL # I'm going to trust the re_references also. parsed_references, re_references = dp.get_references() parsed_references.extend(re_references) res = [] vhosts = self._verify_link_domain(parsed_references) for domain, vhost_response in self._send_in_threads(base_url, vhosts): vhost_resp_body = vhost_response.get_body() if relative_distance_lt(vhost_resp_body, base_resp_body, 0.35) and \ relative_distance_lt(vhost_resp_body, nonexist_resp_body, 0.35): res.append((domain, vhost_response.id)) else: desc = 'The content of "%s" references a non existant domain:'\ ' "%s". This can be a broken link, or an internal domain'\ ' name.' desc = desc % (fuzzable_request.get_url(), domain) i = Info('Internal hostname in HTML link', desc, original_response.id, self.get_name()) i.set_url(fuzzable_request.get_url()) kb.kb.append(self, 'find_vhosts', i) om.out.information(i.get_desc()) return res def _verify_link_domain(self, parsed_references): """ Verify each link in parsed_references and yield the ones that can NOT be resolved using DNS. """ for link in parsed_references: domain = link.get_domain() if domain not in self._already_queried: self._already_queried.add(domain) try: # raises exception when it's not found # socket.gaierror: (-5, 'No address associated with hostname') socket.gethostbyname(domain) except: yield domain def _generic_vhosts(self, fuzzable_request): """ Test some generic virtual hosts, only do this once. """ # Get some responses to compare later base_url = fuzzable_request.get_url().base_url() original_response = self._uri_opener.GET(base_url, cache=True) orig_resp_body = original_response.get_body() non_existant_response = self._get_non_exist(fuzzable_request) nonexist_resp_body = non_existant_response.get_body() res = [] vhosts = self._get_common_virtualhosts(base_url) for vhost, vhost_response in self._send_in_threads(base_url, vhosts): vhost_resp_body = vhost_response.get_body() # If they are *really* different (not just different by some chars) if relative_distance_lt(vhost_resp_body, orig_resp_body, 0.35) and \ relative_distance_lt(vhost_resp_body, nonexist_resp_body, 0.35): res.append((vhost, vhost_response.id)) return res def _send_in_threads(self, base_url, vhosts): base_url_repeater = repeat(base_url) args_iterator = izip(base_url_repeater, vhosts) http_get = return_args(one_to_many(self._http_get_vhost)) pool_results = self.worker_pool.imap_unordered(http_get, args_iterator) for ((base_url, vhost), ), vhost_response in pool_results: yield vhost, vhost_response def _http_get_vhost(self, base_url, vhost): """ Performs an HTTP GET to a URL using a specific vhost. :return: HTTPResponse object. """ headers = Headers([('Host', vhost)]) return self._uri_opener.GET(base_url, cache=False, headers=headers) def _get_non_exist(self, fuzzable_request): base_url = fuzzable_request.get_url().base_url() non_existant_domain = 'iDoNotExistPleaseGoAwayNowOrDie' + rand_alnum(4) return self._http_get_vhost(base_url, non_existant_domain) def _get_common_virtualhosts(self, base_url): """ :param base_url: The target URL object. :return: A list of possible domain names that could be hosted in the same web server that "domain". """ domain = base_url.get_domain() root_domain = base_url.get_root_domain() common_virtual_hosts = [ 'intranet', 'intra', 'extranet', 'extra', 'test', 'test1', 'old', 'new', 'admin', 'adm', 'webmail', 'services', 'console', 'apps', 'mail', 'corporate', 'ws', 'webservice', 'private', 'secure', 'safe', 'hidden', 'public' ] for subdomain in common_virtual_hosts: # intranet yield subdomain # intranet.www.targetsite.com yield subdomain + '.' + domain # intranet.targetsite.com yield subdomain + '.' + root_domain # intranet.targetsite yield subdomain + '.' + root_domain.split('.')[0] def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class dot_net_errors(InfrastructurePlugin): """ Request specially crafted URLs that generate ASP.NET errors in order to gather information. :author: Andres Riancho ([email protected]) """ SPECIAL_CHARS = ['|', '~'] RUNTIME_ERROR = '<h2> <i>Runtime Error</i> </h2></span>' REMOTE_MACHINE = ('<b>Details:</b> To enable the details of this' ' specific error message to be viewable on' ' remote machines') def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._already_tested = ScalableBloomFilter() # On real web applications, if we can't trigger an error in the first # MAX_TESTS tests, it simply won't happen and we have to stop testing. self.MAX_TESTS = 25 def discover(self, fuzzable_request, debugging_id): """ Requests the special filenames. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ if len(self._already_tested) >= self.MAX_TESTS: return if fuzzable_request.get_url() in self._already_tested: return self._already_tested.add(fuzzable_request.get_url()) self.worker_pool.map(self._send_and_check, self._generate_urls(fuzzable_request.get_url()), chunksize=1) def _generate_urls(self, original_url): """ Generate new URLs based on original_url. :param original_url: The original url that has to be modified in order to trigger errors in the remote application. """ filename = original_url.get_file_name() if not filename: return if '.' not in filename: return split_filename = filename.split('.') extension = split_filename[-1:][0] name = '.'.join(split_filename[0:-1]) for char in self.SPECIAL_CHARS: new_filename = name + char + '.' + extension try: new_url = original_url.url_join(new_filename) except ValueError: # When the filename has a colon the url_join() will fail with # ValueError continue yield new_url def _send_and_check(self, url): response = self._uri_opener.GET(url, cache=True) if self.RUNTIME_ERROR not in response.body: return if self.REMOTE_MACHINE in response.body: return desc = ('Detailed information about ASP.NET error messages can be' ' viewed from remote clients. The URL: "%s" discloses' ' detailed error messages.') desc %= response.get_url() v = Vuln('Information disclosure via .NET errors', desc, severity.LOW, response.id, self.get_name()) kb.kb.append(self, 'dot_net_errors', v) def get_plugin_deps(self): """ :return: A list with the names of the plugins that should be run before the current one. """ return ['grep.error_pages'] def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): GrepPlugin.__init__(self) self.already_reported = ScalableBloomFilter()
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter()
class digit_sum(CrawlPlugin): """ Take an URL with a number (index2.asp) and try to find related files (index1.asp, index3.asp). :author: Andres Riancho ([email protected]) """ def __init__(self): CrawlPlugin.__init__(self) self._already_visited = ScalableBloomFilter() # User options self._fuzz_images = False self._max_digit_sections = 4 def crawl(self, fuzzable_request, debugging_id): """ Searches for new URLs by adding and subtracting numbers to the file and the parameters. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ # If the fuzzable request sends post-data in any way, we don't want to # start fuzzing the URL, it simply doesn't make any sense. if fuzzable_request.get_data( ) or fuzzable_request.get_method() != 'GET': return url = fuzzable_request.get_url() headers = Headers([('Referer', url.url_string)]) fuzzable_request.get_headers().update(headers) original_response = self._uri_opener.send_mutant(fuzzable_request, cache=True) if original_response.is_text_or_html() or self._fuzz_images: fr_generator = self._mangle_digits(fuzzable_request) response_repeater = repeat(original_response) args = izip(fr_generator, response_repeater) self.worker_pool.map_multi_args(self._do_request, args) # I add myself so the next call to this plugin wont find me ... # Example: index1.html ---> index2.html --!!--> index1.html self._already_visited.add(fuzzable_request.get_uri()) def _do_request(self, fuzzable_request, original_resp): """ Send the request. :param fuzzable_request: The modified fuzzable request :param original_resp: The response for the original request that was sent. """ response = self._uri_opener.send_mutant(fuzzable_request, cache=True) if is_404(response): return # We have different cases: # - If the URLs are different, then there is nothing to think # about, we simply found something new! if response.get_url() != original_resp.get_url(): self.output_queue.put(fuzzable_request) # - If the content type changed, then there is no doubt that # we've found something new! elif response.doc_type != original_resp.doc_type: self.output_queue.put(fuzzable_request) # - If we changed the query string parameters, we have to check # the content elif fuzzy_not_equal(response.get_clear_text_body(), original_resp.get_clear_text_body(), 0.8): # In this case what might happen is that the number we changed # is "out of range" and when requesting that it will trigger an # error in the web application, or show us a non-interesting # response that holds no content. # # We choose to return these to the core because they might help # with the code coverage efforts. Think about something like: # foo.aspx?id=OUT_OF_RANGE&foo=inject_here # vs. # foo.aspx?id=IN_RANGE&foo=inject_here # # This relates to the EXPECTED_URLS in test_digit_sum.py self.output_queue.put(fuzzable_request) def _mangle_digits(self, fuzzable_request): """ Mangle the digits (if any) in the fr URL. :param fuzzable_request: The original FuzzableRequest :return: A generator which returns mangled fuzzable requests """ # First i'll mangle the digits in the URL filename filename = fuzzable_request.get_url().get_file_name() domain_path = fuzzable_request.get_url().get_domain_path() for fname in self._do_combinations(filename): fr_copy = copy.deepcopy(fuzzable_request) fr_copy.set_url(domain_path.url_join(fname)) if fr_copy.get_uri() not in self._already_visited: self._already_visited.add(fr_copy.get_uri()) yield fr_copy # Now i'll mangle the query string variables data_container = fuzzable_request.get_querystring() for _, token in data_container.iter_bound_tokens(): for modified_value in self._do_combinations(token.get_value()): fr_copy = copy.deepcopy(fuzzable_request) qs = fr_copy.get_querystring() qs_token = qs.set_token(token.get_path()) qs_token.set_value(modified_value) if fr_copy.get_uri() not in self._already_visited: self._already_visited.add(fr_copy.get_uri()) yield fr_copy def _do_combinations(self, a_string): """ >>> ds = digit_sum() >>> ds._do_combinations( 'abc123' ) ['abc124', 'abc122'] >>> ds._do_combinations( 'abc123def56' ) ['abc124def56', 'abc122def56', 'abc123def57', 'abc123def55'] """ res = [] split = self._find_digits(a_string) if len(split) <= 2 * self._max_digit_sections: for i in xrange(len(split)): if split[i].isdigit(): split[i] = str(int(split[i]) + 1) res.append(''.join(split)) split[i] = str(int(split[i]) - 2) res.append(''.join(split)) # restore the initial value for next loop split[i] = str(int(split[i]) + 1) return res def _find_digits(self, a_string): """ Finds digits in a string and returns a list with string sections. >>> ds = digit_sum() >>> ds._find_digits('foo45') ['foo', '45'] >>> ds._find_digits('f001bar112') ['f', '001', 'bar', '112'] :return: A list of strings. """ # regexes are soooooooooooooo cool ! return [x for x in DIGIT_REGEX.split(a_string) if x != ''] def get_options(self): """ :return: A list of option objects for this plugin. """ ol = OptionList() d = 'Apply URL fuzzing to all URLs, including images, videos, zip, etc.' h = 'It\'s safe to leave this option as the default.' o = opt_factory('fuzzImages', self._fuzz_images, d, 'boolean', help=h) ol.add(o) d = 'Set the top number of sections to fuzz' h = 'It\'s safe to leave this option as the default. For example, with'\ ' maxDigitSections = 1, this string wont be fuzzed: abc123def234 ;'\ ' but this one will abc23ldd.' o = opt_factory('maxDigitSections', self._max_digit_sections, d, 'integer', help=h) ol.add(o) return ol def set_options(self, options_list): """ This method sets all the options that are configured using the user interface generated by the framework using the result of get_options(). :param options_list: A dictionary with the options for the plugin. :return: No value is returned. """ self._fuzz_images = options_list['fuzzImages'].get_value() self._max_digit_sections = options_list['maxDigitSections'].get_value() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class dot_listing(CrawlPlugin): """ Search for .listing files and extracts new filenames from it. :author: Tomas Velazquez ( [email protected] ) """ # -rw-r--r-- 1 andresr w3af 8139 Apr 12 13:23 foo.zip regex_str = '[a-z-]{10}\s*\d+\s*(.*?)\s+(.*?)\s+\d+\s+\w+\s+\d+\s+[0-9:]{4,5}\s+(.*)' LISTING_PARSER_RE = re.compile(regex_str) def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() def crawl(self, fuzzable_request): """ For every directory, fetch the .listing file and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ for domain_path in fuzzable_request.get_url().get_directories(): if domain_path not in self._analyzed_dirs: self._analyzed_dirs.add(domain_path) self._check_and_analyze(domain_path) def _check_and_analyze(self, domain_path): """ Check if a .listing filename exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ # Request the file url = domain_path.url_join('.listing') try: response = self._uri_opener.GET(url, cache=True) except BaseFrameworkException, w3: msg = 'Failed to GET .listing file: "%s". Exception: "%s".' om.out.debug(msg % (url, w3)) return # Check if it's a .listing file if is_404(response): return fr = FuzzableRequest(response.get_url()) self.output_queue.put(fr) parsed_url_set = set() users = set() groups = set() extracted_info = self._extract_info_from_listing(response.get_body()) for username, group, filename in extracted_info: if filename != '.' and filename != '..': parsed_url_set.add(domain_path.url_join(filename)) users.add(username) groups.add(group) self.worker_pool.map(self.http_get_and_parse, parsed_url_set) if parsed_url_set: desc = 'A .listing file was found at: "%s". The contents' \ ' of this file disclose filenames.' desc = desc % (response.get_url()) v = Vuln('.listing file found', desc, severity.LOW, response.id, self.get_name()) v.set_url(response.get_url()) kb.kb.append(self, 'dot_listing', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) real_users = set([u for u in users if not u.isdigit()]) real_groups = set([g for g in groups if not g.isdigit()]) if real_users or real_groups: desc = 'A .listing file which leaks operating system usernames' \ ' and groups was identified at %s. The leaked users are %s,' \ ' and the groups are %s. This information can be used' \ ' during a bruteforce attack to the Web application,' \ ' SSH or FTP services.' desc = desc % (response.get_url(), ', '.join(real_users), ', '.join(real_groups)) v = Vuln('Operating system username and group leak', desc, severity.LOW, response.id, self.get_name()) v.set_url(response.get_url()) kb.kb.append(self, 'dot_listing', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity())
class allowed_methods(InfrastructurePlugin): """ Enumerate the allowed methods of an URL. :author: Andres Riancho ([email protected]) """ BAD_CODES = set([ response_codes.UNAUTHORIZED, response_codes.NOT_IMPLEMENTED, response_codes.METHOD_NOT_ALLOWED, response_codes.FORBIDDEN ]) DAV_METHODS = set([ 'DELETE', 'PROPFIND', 'PROPPATCH', 'COPY', 'MOVE', 'LOCK', 'UNLOCK', 'MKCOL' ]) COMMON_METHODS = set(['OPTIONS', 'GET', 'HEAD', 'POST', 'TRACE', 'PUT']) UNCOMMON_METHODS = set([ '*', 'SUBSCRIPTIONS', 'NOTIFY', 'DEBUG', 'TRACK', 'POLL', 'PIN', 'INVOKE', 'SUBSCRIBE', 'UNSUBSCRIBE' ]) # Methods taken from http://www.w3.org/Protocols/HTTP/Methods.html PROPOSED_METHODS = set([ 'CHECKOUT', 'SHOWMETHOD', 'LINK', 'UNLINK', 'CHECKIN', 'TEXTSEARCH', 'SPACEJUMP', 'SEARCH', 'REPLY' ]) EXTRA_METHODS = set([ 'CONNECT', 'RMDIR', 'MKDIR', 'REPORT', 'ACL', 'DELETE', 'INDEX', 'LABEL', 'INVALID' ]) VERSION_CONTROL = set([ 'VERSION_CONTROL', 'CHECKIN', 'UNCHECKOUT', 'PATCH', 'MERGE', 'MKWORKSPACE', 'MKACTIVITY', 'BASELINE_CONTROL' ]) def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._exec = True self._already_tested = ScalableBloomFilter() # Methods self._supported_methods = self.DAV_METHODS | self.COMMON_METHODS | \ self.UNCOMMON_METHODS | self.PROPOSED_METHODS | \ self.EXTRA_METHODS | self.VERSION_CONTROL # User configured variables self._exec_one_time = True self._report_dav_only = True def discover(self, fuzzable_request): """ Uses several techniques to try to find out what methods are allowed for an URL. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ if not self._exec: # This will remove the plugin from the infrastructure # plugins to be run. raise RunOnce() # Run the plugin. if self._exec_one_time: self._exec = False domain_path = fuzzable_request.get_url().get_domain_path() if domain_path not in self._already_tested: self._already_tested.add(domain_path) allowed_methods, id_list = self._identify_allowed_methods( domain_path) self._analyze_methods(domain_path, allowed_methods, id_list) def _identify_allowed_methods(self, url): # First, try to check available methods using OPTIONS, # if OPTIONS isn't enabled, do it manually allowed_options, id_options = self._identify_with_OPTIONS(url) allowed_bf, id_bf = self._identify_with_bruteforce(url) allowed_methods = allowed_options + allowed_bf id_list = id_options + id_bf # Added this to make the output a little bit more readable. allowed_methods.sort() return allowed_methods, id_list def _identify_with_OPTIONS(self, url): """ Find out what methods are allowed using OPTIONS :param url: Where to check. """ allowed_methods = [] id_list = [] try: res = self._uri_opener.OPTIONS(url) except: pass else: headers = res.get_lower_case_headers() id_list.append(res.id) for header_name in ['allow', 'public']: if header_name in headers: allowed_methods.extend(headers[header_name].split(',')) allowed_methods = [x.strip() for x in allowed_methods] allowed_methods = list(set(allowed_methods)) return allowed_methods, id_list def _identify_with_bruteforce(self, url): id_list = [] allowed_methods = [] # # Before doing anything else, I'll send a request with a # non-existant method if that request succeds, then all will... # non_exist_response = self._uri_opener.ARGENTINA(url) get_response = self._uri_opener.GET(url) if non_exist_response.get_code() not in self.BAD_CODES\ and get_response.get_body() == non_exist_response.get_body(): desc = 'The remote Web server has a custom configuration, in'\ ' which any not implemented methods that are invoked are'\ ' defaulted to GET instead of returning a "Not Implemented"'\ ' response.' response_ids = [non_exist_response.get_id(), get_response.get_id()] i = Info('Non existent methods default to GET', desc, response_ids, self.get_name()) i.set_url(url) kb.kb.append(self, 'custom-configuration', i) # # It makes no sense to continue working, all methods will # appear as enabled because of this custom configuration. # return [], [non_exist_response.id, get_response.id] # 'DELETE' is not tested! I don't want to remove anything... # 'PUT' is not tested! I don't want to overwrite anything... methods_to_test = self._supported_methods.copy() # remove dangerous methods. methods_to_test.remove('DELETE') methods_to_test.remove('PUT') for method in methods_to_test: method_functor = getattr(self._uri_opener, method) try: response = apply(method_functor, (url, ), {}) except: pass else: code = response.get_code() if code not in self.BAD_CODES: allowed_methods.append(method) id_list.append(response.id) return allowed_methods, id_list def _analyze_methods(self, url, allowed_methods, id_list): # Check for DAV if set(allowed_methods).intersection(self.DAV_METHODS): # dav is enabled! # Save the results in the KB so that other plugins can use this # information desc = 'The URL "%s" has the following allowed methods. These'\ ' include DAV methods and should be disabled: %s' desc = desc % (url, ', '.join(allowed_methods)) i = Info('DAV methods enabled', desc, id_list, self.get_name()) i.set_url(url) i['methods'] = allowed_methods kb.kb.append(self, 'dav-methods', i) else: # Save the results in the KB so that other plugins can use this # information. Do not remove these information, other plugins # REALLY use it ! desc = 'The URL "%s" has the following enabled HTTP methods: %s' desc = desc % (url, ', '.join(allowed_methods)) i = Info('Allowed HTTP methods', desc, id_list, self.get_name()) i.set_url(url) i['methods'] = allowed_methods kb.kb.append(self, 'methods', i) def end(self): """ Print the results. """ # First I get the data from the kb all_info_obj = kb.kb.get('allowed_methods', 'methods') dav_info_obj = kb.kb.get('allowed_methods', 'dav-methods') # Now I transform it to something I can use with group_by_min_key allMethods = [] for i in all_info_obj: allMethods.append((i.get_url(), i['methods'])) davMethods = [] for i in dav_info_obj: davMethods.append((i.get_url(), i['methods'])) # Now I work the data... to_show, method_type = davMethods, ' DAV' if not self._report_dav_only: to_show, method_type = allMethods, '' # Make it hashable tmp = [] for url, methodList in to_show: tmp.append((url, ', '.join(methodList))) result_dict, itemIndex = group_by_min_key(tmp) for k in result_dict: if itemIndex == 0: # Grouped by URLs msg = 'The URL: "%s" has the following' + \ method_type + ' methods enabled:' om.out.information(msg % k) else: # Grouped by Methods msg = 'The methods: ' + k + \ ' are enabled on the following URLs:' om.out.information(msg) for i in result_dict[k]: om.out.information('- ' + i) def get_options(self): """ :return: A list of option objects for this plugin. """ ol = OptionList() d1 = 'Execute plugin only one time' h1 = 'Generally the methods allowed for a URL are configured system'\ ' wide, so executing this plugin only once is the faster choice.'\ ' The most accurate choice is to run it against every URL.' o = opt_factory('execOneTime', self._exec_one_time, d1, 'boolean', help=h1) ol.add(o) d2 = 'Only report findings if uncommon methods are found' o = opt_factory('reportDavOnly', self._report_dav_only, d2, 'boolean') ol.add(o) return ol def set_options(self, options_list): """ This method sets all the options that are configured using the user interface generated by the framework using the result of get_options(). :param OptionList: A dictionary with the options for the plugin. :return: No value is returned. """ self._exec_one_time = options_list['execOneTime'].get_value() self._report_dav_only = options_list['reportDavOnly'].get_value() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): GrepPlugin.__init__(self) self._already_visited = ScalableBloomFilter()
def __init__(self): GrepPlugin.__init__(self) # User configured variables self._only_target_domain = True self._already_reported = ScalableBloomFilter()
class find_backdoors(CrawlPlugin): """ Find web backdoors and web shells. :author: Andres Riancho ([email protected]) """ WEBSHELL_DB = os.path.join(CRAWL_PATH, 'find_backdoors', 'web_shells.txt') SIGNATURE_DB = os.path.join(CRAWL_PATH, 'find_backdoors', 'signatures.txt') def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() self._signature_re = None def setup(self): with self._plugin_lock: if self._signature_re is not None: return signatures = self._read_signatures() self._signature_re = multi_re(signatures, hint_len=2) def _read_signatures(self): for line in file(self.SIGNATURE_DB): line = line.strip() if not line: continue if line.startswith('#'): continue yield (line, 'Backdoor signature') def crawl(self, fuzzable_request): """ For every directory, fetch a list of shell files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ domain_path = fuzzable_request.get_url().get_domain_path() if domain_path not in self._analyzed_dirs: self._analyzed_dirs.add(domain_path) self.setup() # Read the web shell database web_shells = self._iter_web_shells() # Send the requests using threads: args_iter = (domain_path.url_join(fname) for fname in web_shells) self.worker_pool.map(self._check_if_exists, args_iter) def _iter_web_shells(self): """ :yield: lines from the web shell DB """ for line in file(self.WEBSHELL_DB): line = line.strip() if line.startswith('#'): continue if not line: continue yield line def _check_if_exists(self, web_shell_url): """ Check if the file exists. :param web_shell_url: The URL to check """ try: response = self._uri_opener.GET(web_shell_url, cache=True) except BaseFrameworkException: om.out.debug('Failed to GET webshell:' + web_shell_url) else: if response.get_code() == 200: signature = self._match_signature(response) if signature is None: return desc = ( u'An HTTP response matching the web backdoor signature' u' "%s" was found at: "%s"; this could indicate that the' u' server has been compromised.') desc %= (signature, response.get_url()) # It's probability is higher if we found a long signature _severity = severity.HIGH if len( signature) > 8 else severity.MEDIUM v = Vuln(u'Potential web backdoor', desc, _severity, response.id, self.get_name()) v.set_url(response.get_url()) kb.kb.append(self, 'backdoors', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) fr = FuzzableRequest.from_http_response(response) self.output_queue.put(fr) else: return def _match_signature(self, response): """ Heuristic to infer if the content of <response> has the pattern of a backdoor response. :param response: HTTPResponse object :return: A bool value """ body_text = response.get_body() for match, _, _, _ in self._signature_re.query(body_text): match_string = match.group(0) return match_string return None def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class pykto(CrawlPlugin): """ A nikto port to python. :author: Andres Riancho ([email protected]) """ def __init__(self): CrawlPlugin.__init__(self) # internal variables self._exec = True self._already_analyzed = ScalableBloomFilter() # User configured parameters self._db_file = os.path.join(ROOT_PATH, "plugins", "crawl", "pykto", "scan_database.db") self._extra_db_file = os.path.join(ROOT_PATH, "plugins", "crawl", "pykto", "w3af_scan_database.db") self._cgi_dirs = ["/cgi-bin/"] self._admin_dirs = ["/admin/", "/adm/"] self._users = [ "adm", "bin", "daemon", "ftp", "guest", "listen", "lp", "mysql", "noaccess", "nobody", "nobody4", "nuucp", "operator", "root", "smmsp", "smtp", "sshd", "sys", "test", "unknown", ] self._nuke = ["/", "/postnuke/", "/postnuke/html/", "/modules/", "/phpBB/", "/forum/"] self._mutate_tests = False def crawl(self, fuzzable_request): """ Runs pykto to the site. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ if not self._exec and not self._mutate_tests: # dont run anymore raise RunOnce() else: # Run the basic scan (only once) url = fuzzable_request.get_url().base_url() if url not in self._already_analyzed: self._already_analyzed.add(url) self._run(url) self._exec = False # And now mutate if the user configured it... if self._mutate_tests: # Tests need to be mutated url = fuzzable_request.get_url().get_domain_path() if url not in self._already_analyzed: # Save the directories I already have tested in order to # avoid testing them more than once... self._already_analyzed.add(url) self._run(url) def _run(self, url): """ Really run the plugin. :param url: The URL object I have to test. """ config = Config(self._cgi_dirs, self._admin_dirs, self._nuke, self._mutate_tests, self._users) for db_file in [self._db_file, self._extra_db_file]: parser = NiktoTestParser(db_file, config, url) # Send the requests using threads: self.worker_pool.map_multi_args(self._send_and_check, parser.test_generator(), chunksize=10) def _send_and_check(self, nikto_test): """ This method sends the request to the server. :return: True if the requested URI responded as expected. """ # # Small performance improvement. If all we want to know is if the # file exists or not, lets use HEAD instead of GET. In 99% of the # cases this will work as expected and we'll have a significant # performance improvement. # if nikto_test.is_vulnerable.checks_only_response_code(): try: http_response = self._uri_opener.HEAD(nikto_test.uri) except Exception: return else: if not nikto_test.is_vulnerable.check(http_response): return False function_ptr = getattr(self._uri_opener, nikto_test.method) try: http_response = function_ptr(nikto_test.uri) except BaseFrameworkException, e: msg = 'An exception was raised while requesting "%s", the error' ' message is: "%s".' om.out.error(msg % (nikto_test.uri, e)) return False if nikto_test.is_vulnerable.check(http_response) and not is_404(http_response): vdesc = 'pykto plugin found a vulnerability at URL: "%s".' ' Vulnerability description: "%s".' vdesc = vdesc % (http_response.get_url(), nikto_test.message) v = Vuln("Insecure resource", vdesc, severity.LOW, http_response.id, self.get_name()) v.set_uri(http_response.get_uri()) v.set_method(nikto_test.method) kb.kb.append(self, "vuln", v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) fr = FuzzableRequest.from_http_response(http_response) self.output_queue.put(fr)
class find_dvcs(CrawlPlugin): """ Search Git, Mercurial (HG), Bazaar (BZR), Subversion (SVN) and CVS repositories and checks for files containing :author: Adam Baldwin ([email protected]) :author: Tomas Velazquez (tomas.velazquezz - gmail.com) :author: Andres Riancho ([email protected]) """ BAD_HTTP_CODES = {301, 302, 307} def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() self._analyzed_filenames = ScalableBloomFilter() self._dvcs = [ DVCSTest('.git/index', 'git repository', self.git_index), DVCSTest('.gitignore', 'git ignore', self.ignore_file), DVCSTest('.hg/dirstate', 'hg repository', self.hg_dirstate), DVCSTest('.hgignore', 'hg ignore', self.ignore_file), DVCSTest('.bzr/checkout/dirstate', 'bzr repository', self.bzr_checkout_dirstate), DVCSTest('.bzrignore', 'bzr ignore', self.ignore_file), DVCSTest('.svn/entries', 'svn repository', self.svn_entries), DVCSTest('.svn/wc.db', 'svn repository db', self.svn_wc_db), DVCSTest('.svnignore', 'svn ignore', self.ignore_file), DVCSTest('CVS/Entries', 'cvs repository', self.cvs_entries), DVCSTest('.cvsignore', 'cvs ignore', self.ignore_file) ] def crawl(self, fuzzable_request, debugging_id): """ For every directory, fetch a list of files and analyze the response. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ domain_path = fuzzable_request.get_url().get_domain_path() if domain_path in self._analyzed_dirs: return self._analyzed_dirs.add(domain_path) test_generator = self._url_generator(domain_path) self.worker_pool.map_multi_args(self._send_and_check, test_generator) def _url_generator(self, domain_path): """ Based on different URLs with directories, generate the URLs that need to be tested. :return: URLs """ for dvcs_test in self._dvcs: repo_url = domain_path.url_join(dvcs_test.filename) yield (repo_url, dvcs_test.method, dvcs_test.name, domain_path) def _clean_filenames(self, filenames): """ Filter some characters from filenames. :return: A clear list of filenames. """ resources = set() for filename in filenames: # Sometimes we get random bytes from the .git/index because of # git versions we don't fully support, so we ignore any encoding # errors filename = smart_unicode(filename, errors='ignore') if filename.startswith('/'): filename = filename[1:] if filename.startswith('./'): filename = filename[2:] if filename.endswith('/'): filename = filename[:-1] resources.add(filename) return resources def _send_and_check(self, repo_url, repo_get_files, repo, domain_path): """ Check if a repository index exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ # Here we use the new http_get instead of http_get_and_parse because # we want to check BAD_HTTP_CODES and the response body (see below) # before we send the response to the core http_response = self.http_get(repo_url, binary_response=True, respect_size_limit=False, grep=False) if is_404(http_response): return if http_response.get_code() in self.BAD_HTTP_CODES: return if not http_response.get_body(): return try: filenames = repo_get_files(http_response.get_raw_body()) except Exception, e: # We get here when the HTTP response is NOT a 404, but the response # body couldn't be properly parsed. This is usually because of a false # positive in the is_404 function, OR a new version-format of the file # to be parsed. # # Log in order to be able to improve the framework. args = (e, repo_get_files.__name__, repo_url) om.out.debug('Got a "%s" exception while running "%s" on "%s"' % args) return parsed_url_set = set() for filename in self._clean_filenames(filenames): test_url = domain_path.url_join(filename) if test_url in self._analyzed_filenames: continue parsed_url_set.add(test_url) self._analyzed_filenames.add(filename) if not parsed_url_set: return self.worker_pool.map(self.http_get_and_parse, parsed_url_set) # After performing the checks (404, redirects, body is not empty, body # can be parsed, body actually had filenames inside) send the URL to the # core fr = FuzzableRequest(repo_url, method='GET') self.output_queue.put(fr) # Now we send this finding to the report for manual analysis desc = ('A %s was found at: "%s"; this could indicate that a %s is' ' accessible. You might be able to download the Web' ' application source code.') desc %= (repo, http_response.get_url(), repo) v = Vuln('Source code repository', desc, severity.MEDIUM, http_response.id, self.get_name()) v.set_url(http_response.get_url()) kb.kb.append(self, repo, v) om.out.vulnerability(v.get_desc(), severity=v.get_severity())
class find_dvcs(CrawlPlugin): """ Search Git, Mercurial (HG), Bazaar (BZR), Subversion (SVN) and CVS repositories and checks for files containing :author: Adam Baldwin ([email protected]) :author: Tomas Velazquez (tomas.velazquezz - gmail.com) """ def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() self._analyzed_filenames = ScalableBloomFilter() self._dvcs = { 'git repository': {}, 'git ignore': {}, 'hg repository': {}, 'hg ignore': {}, 'bzr repository': {}, 'bzr ignore': {}, 'svn repository': {}, 'svn ignore': {}, 'cvs repository': {}, 'cvs ignore': {} } self._dvcs['git repository']['filename'] = '.git/index' self._dvcs['git repository']['function'] = self.git_index self._dvcs['git ignore']['filename'] = '.gitignore' self._dvcs['git ignore']['function'] = self.ignore_file self._dvcs['hg repository']['filename'] = '.hg/dirstate' self._dvcs['hg repository']['function'] = self.hg_dirstate self._dvcs['hg ignore']['filename'] = '.hgignore' self._dvcs['hg ignore']['function'] = self.ignore_file self._dvcs['bzr repository']['filename'] = '.bzr/checkout/dirstate' self._dvcs['bzr repository']['function'] = self.bzr_checkout_dirstate self._dvcs['bzr ignore']['filename'] = '.bzrignore' self._dvcs['bzr ignore']['function'] = self.ignore_file self._dvcs['svn repository']['filename'] = '.svn/entries' self._dvcs['svn repository']['function'] = self.svn_entries self._dvcs['svn ignore']['filename'] = '.svnignore' self._dvcs['svn ignore']['function'] = self.ignore_file self._dvcs['cvs repository']['filename'] = 'CVS/Entries' self._dvcs['cvs repository']['function'] = self.cvs_entries self._dvcs['cvs ignore']['filename'] = '.cvsignore' self._dvcs['cvs ignore']['function'] = self.ignore_file def crawl(self, fuzzable_request): """ For every directory, fetch a list of files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ domain_path = fuzzable_request.get_url().get_domain_path() if domain_path not in self._analyzed_dirs: self._analyzed_dirs.add(domain_path) test_generator = self._url_generator(domain_path) self.worker_pool.map_multi_args(self._send_and_check, test_generator) def _url_generator(self, domain_path): """ Based on different URLs with directories, generate the URLs that need to be tested. :return: URLs """ for repo in self._dvcs.keys(): repo_url = domain_path.url_join(self._dvcs[repo]['filename']) function = self._dvcs[repo]['function'] yield repo_url, function, repo, domain_path def _clean_filenames(self, filenames): """ Filter some characters from filenames. :return: A clear list of filenames. """ resources = set() for line in filenames: if line.startswith('/'): line = line[1:] if line.startswith('./'): line = line[2:] if line.endswith('/'): line = line[:-1] resources.add(line) return resources def _send_and_check(self, repo_url, repo_get_files, repo, domain_path): """ Check if a repository index exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ http_response = self.http_get_and_parse(repo_url) if is_404(http_response): return filenames = repo_get_files(http_response.get_body()) parsed_url_set = set() for filename in self._clean_filenames(filenames): test_url = domain_path.url_join(filename) if test_url not in self._analyzed_filenames: parsed_url_set.add(test_url) self._analyzed_filenames.add(filename) self.worker_pool.map(self.http_get_and_parse, parsed_url_set) if parsed_url_set: desc = 'A %s was found at: "%s"; this could indicate that'\ ' a %s is accessible. You might be able to download'\ ' the Web application source code.' desc %= repo, http_response.get_url(), repo v = Vuln('Source code repository', desc, severity.MEDIUM, http_response.id, self.get_name()) v.set_url(http_response.get_url()) kb.kb.append(self, repo, v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) def git_index(self, body): """ Analyze the contents of the Git index and extract filenames. :param body: The contents of the file to analyze. :return: A list of filenames found. """ filenames = set() signature = 'DIRC' offset = 12 if body[:4] != signature: return set() version, = struct.unpack('>I', body[4:8]) index_entries, = struct.unpack('>I', body[8:12]) if version == 2: filename_offset = 62 elif version == 3: filename_offset = 63 else: return filenames for _ in range(0, index_entries): offset += filename_offset - 1 length, = struct.unpack('>B', body[offset:offset + 1]) if length > (len(body) - offset): return set() filename = body[offset + 1:offset + 1 + length] padding = 8 - ((filename_offset + length) % 8) filenames.add(filename) offset += length + 1 + padding return filenames def hg_dirstate(self, body): """ Analyze the contents of the HG dirstate and extract filenames. :param body: The contents of the file to analyze. :return: A list of filenames found. """ filenames = set() offset = 53 while offset < len(body): length, = struct.unpack('>I', body[offset:offset + 4]) if length > (len(body) - offset): return set() offset += 4 filename = body[offset:offset + length] offset += length + 13 filenames.add(filename) return filenames def bzr_checkout_dirstate(self, body): """ Analyze the contents of the BZR dirstate and extract filenames. :param body: The contents of the file to analyze. :return: A list of filenames found. """ filenames = set() header = '#bazaar dirstate flat format ' if body[0:29] != header: return set() body = body.split('\x00') found = True for offset in range(0, len(body)): filename = body[offset - 2] if body[offset] == 'd': if found: filenames.add(filename) found = not found elif body[offset] == 'f': if found: filenames.add(filename) found = not found return filenames def svn_entries(self, body): """ Analyze the contents of the SVN entries and extract filenames. :param body: The contents of the file to analyze. :return: A list of filenames found. """ filenames = set() lines = body.split('\n') offset = 29 while offset < len(lines): line = lines[offset].strip() filename = lines[offset - 1].strip() if line == 'file': filenames.add(filename) offset += 34 elif line == 'dir': filenames.add(filename) offset += 3 return filenames def cvs_entries(self, body): """ Analyze the contents of the CVS entries and extract filenames. :param body: The contents of the file to analyze. :return: A list of filenames found. """ filenames = set() for line in body.split('\n'): if '/' in line: slashes = line.split('/') if len(slashes) != 6: continue filenames.add(slashes[1]) return filenames def filter_special_character(self, line): """ Analyze the possible regexp contents and extract filenames or directories without regexp. :param line: A regexp filename or directory. :return: A real filename or directory. """ special_characters = ['*', '?', '[', ']', ':'] for char in special_characters: if char in line: l = line.split(char)[0] if '/' in l: line = '/'.join(l.split('/')[:-1]) else: line = '' break return line def ignore_file(self, body): """ Analyze the contents of the Git, HG, BZR, SVN and CVS ignore file and extract filenames. :param body: The contents of the file to analyze. :return: A list of filenames found. """ filenames = set() for line in body.split('\n'): line = line.strip() if line.startswith('#') or line == '': continue line = self.filter_special_character(line) if not line: continue if line.startswith('/') or line.startswith('^'): line = line[1:] if line.endswith('/') or line.endswith('$'): line = line[:-1] filenames.add(line) return filenames def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class dot_net_errors(InfrastructurePlugin): """ Request specially crafted URLs that generate ASP.NET errors in order to gather information. :author: Andres Riancho (([email protected])) """ def __init__(self): InfrastructurePlugin.__init__(self) # Internal variables self._already_tested = ScalableBloomFilter() # On real web applications, if we can't trigger an error in the first # MAX_TESTS tests, it simply won't happen and we have to stop testing. self.MAX_TESTS = 25 def discover(self, fuzzable_request): """ Requests the special filenames. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ if len(self._already_tested) < self.MAX_TESTS \ and fuzzable_request.get_url() not in self._already_tested: self._already_tested.add(fuzzable_request.get_url()) test_generator = self._generate_urls(fuzzable_request.get_url()) self.worker_pool.map(self._send_and_check, test_generator, chunksize=1) def _generate_urls(self, original_url): """ Generate new URLs based on original_url. :param original_url: The original url that has to be modified in order to trigger errors in the remote application. """ special_chars = ['|', '~'] filename = original_url.get_file_name() if filename != '' and '.' in filename: splitted_filename = filename.split('.') extension = splitted_filename[-1:][0] name = '.'.join(splitted_filename[0:-1]) for char in special_chars: new_filename = name + char + '.' + extension new_url = original_url.url_join(new_filename) yield new_url def _send_and_check(self, url): response = self._uri_opener.GET(url, cache=True) viewable_remote_machine = '<b>Details:</b> To enable the details of this' viewable_remote_machine += ' specific error message to be viewable on' viewable_remote_machine += ' remote machines' if viewable_remote_machine not in response.body\ and '<h2> <i>Runtime Error</i> </h2></span>' in response.body: desc = 'Detailed information about ASP.NET error messages can be'\ ' viewed from remote sites. The URL: "%s" discloses'\ ' detailed error messages.' desc = desc % response.get_url() v = Vuln('Information disclosure via .NET errors', desc, severity.LOW, response.id, self.get_name()) kb.kb.append(self, 'dot_net_errors', v) def get_plugin_deps(self): """ :return: A list with the names of the plugins that should be run before the current one. """ return ['grep.error_pages'] def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): GrepPlugin.__init__(self) # Internal variables self._already_reported = ScalableBloomFilter()
def __init__(self): GrepPlugin.__init__(self) # User configured variables self._only_target_domain = True self._already_reported = ScalableBloomFilter()
class dot_listing(CrawlPlugin): """ Search for .listing files and extracts new filenames from it. :author: Tomas Velazquez ( [email protected] ) """ # -rw-r--r-- 1 andresr w3af 8139 Apr 12 13:23 foo.zip regex_str = r'[a-z-]{10}\s*\d+\s*(.*?)\s+(.*?)\s+\d+\s+\w+\s+\d+\s+[0-9:]{4,5}\s+(.*)' LISTING_PARSER_RE = re.compile(regex_str) def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() def crawl(self, fuzzable_request): """ For every directory, fetch the .listing file and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ directories_to_check = [] for domain_path in fuzzable_request.get_url().get_directories(): if domain_path in self._analyzed_dirs: continue self._analyzed_dirs.add(domain_path) directories_to_check.append(domain_path) # Send the requests using threads self.worker_pool.map(self._check_and_analyze, directories_to_check) def _check_and_analyze(self, domain_path): """ Check if a .listing filename exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ url = domain_path.url_join('.listing') response = self._uri_opener.GET(url, cache=True) if is_404(response): return parsed_url_set = set() users = set() groups = set() # Check if it's a .listing file extracted_info = self._extract_info_from_listing(response.get_body()) for username, group, filename in extracted_info: if filename in ('.', '..'): continue parsed_url_set.add(domain_path.url_join(filename)) users.add(username) groups.add(group) self.worker_pool.map(self.http_get_and_parse, parsed_url_set) if parsed_url_set: desc = ('A .listing file was found at: "%s". The contents' ' of this file disclose filenames.') desc %= (response.get_url()) v = Vuln('.listing file found', desc, severity.LOW, response.id, self.get_name()) v.set_url(response.get_url()) kb.kb.append(self, 'dot_listing', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) fr = FuzzableRequest(response.get_url()) self.output_queue.put(fr) real_users = set([u for u in users if not u.isdigit()]) real_groups = set([g for g in groups if not g.isdigit()]) if real_users or real_groups: desc = ('A .listing file which leaks operating system user names' ' and groups was identified at %s. The leaked users are %s,' ' and the groups are %s. This information can be used' ' during a bruteforce attack of the Web application,' ' SSH or FTP services.') desc %= (response.get_url(), ', '.join(real_users), ', '.join(real_groups)) v = Vuln('Operating system username and group leak', desc, severity.LOW, response.id, self.get_name()) v.set_url(response.get_url()) kb.kb.append(self, 'dot_listing', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) def _extract_info_from_listing(self, listing_file_content): """ Extract info from .listing file content, each line looks like: -rw-r--r-- 1 andresr w3af 8139 Apr 12 13:23 foo.zip We're going to extract "andresr" (user), "w3af" (group) and "foo.zip" (file). :return: A list with the information extracted from the listing_file_content """ for user, group, filename in self.LISTING_PARSER_RE.findall(listing_file_content): yield user, group, filename.strip() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class get_emails(GrepPlugin): """ Find email accounts. :author: Andres Riancho ([email protected]) """ def __init__(self): GrepPlugin.__init__(self) # User configured variables self._only_target_domain = True self._already_reported = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point, get the emails and save them to the kb. :param request: The HTTP request :param request: The HTTP response :return: None """ self._grep_worker(request, response, 'emails', response.get_url().get_root_domain()) if not self._only_target_domain: self._grep_worker(request, response, 'external_emails') def _grep_worker(self, request, response, kb_key, domain=None): """ Helper method for using in self.grep() :param request: The HTTP request :param response: The HTTP response :param kb_key: Knowledge base dict key :param domain: Target domain for get_emails filter :return: None """ try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: msg = 'Failed to get document parser for "%s" at get_emails.' om.out.debug(msg % response.get_url()) return emails = set(dp.get_emails(domain)) for mail_address in emails: # Reduce false positives # if request.sent(mail_address): # continue # Email address are case insensitive mail_address = mail_address.lower() url = response.get_url() uniq_key = (mail_address, url) if uniq_key in self._already_reported: continue # Avoid dups self._already_reported.add(uniq_key) # Create a new info object, and report it desc = 'The mail account: "%s" was found at "%s".' desc = desc % (mail_address, url) v = Vuln('Email address disclosure', desc, severity.INFORMATION, response.id, self.get_name()) v.add_to_highlight(mail_address) v.set_url(url) v[EmailInfoSet.ITAG] = mail_address v['user'] = mail_address.split('@')[0] self.kb_append_uniq_group('emails', kb_key, v, group_klass=EmailInfoSet) def set_options(self, options_list): self._only_target_domain = options_list['only_target_domain'].get_value() def get_options(self): """ :return: A list of option objects for this plugin. """ ol = OptionList() d1 = 'Only search emails for domain of target' o1 = opt_factory('only_target_domain', self._only_target_domain, d1, 'boolean') ol.add(o1) return ol def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class url_fuzzer(CrawlPlugin): """ Try to find backups, and other related files. :author: Andres Riancho ([email protected]) """ _appendables = ('~', '.tar.gz', '.gz', '.7z', '.cab', '.tgz', '.gzip', '.bzip2', '.inc', '.zip', '.rar', '.jar', '.java', '.class', '.properties', '.bak', '.bak1', '.bkp', '.back', '.backup', '.backup1', '.old', '.old1', '.$$$' ) _backup_exts = ('tar.gz', '7z', 'gz', 'cab', 'tgz', 'gzip', 'bzip2', 'zip', 'rar' ) _file_types = ( 'inc', 'fla', 'jar', 'war', 'java', 'class', 'properties', 'bak', 'bak1', 'backup', 'backup1', 'old', 'old1', 'c', 'cpp', 'cs', 'vb', 'phps', 'disco', 'ori', 'orig', 'original' ) def __init__(self): CrawlPlugin.__init__(self) self._first_time = True self._fuzz_images = False self._seen = ScalableBloomFilter() def crawl(self, fuzzable_request): """ Searches for new Url's using fuzzing. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ url = fuzzable_request.get_url() self._headers = Headers([('Referer', url.url_string)]) if self._first_time: self._verify_head_enabled(url) self._first_time = False # First we need to delete fragments and query strings from URL. url = url.uri2url() # And we mark this one as a "do not return" URL, because the # core already found it using another technique. self._seen.add(url) self._verify_head_enabled(url) if self._head_enabled(): response = self._uri_opener.HEAD( url, cache=True, headers=self._headers) else: response = self._uri_opener.GET( url, cache=True, headers=self._headers) if response.is_text_or_html() or self._fuzz_images: mutants_chain = chain(self._mutate_by_appending(url), self._mutate_path(url), self._mutate_file_type(url), self._mutate_domain_name(url)) url_repeater = repeat(url) args = izip(url_repeater, mutants_chain) self.worker_pool.map_multi_args(self._do_request, args) def _do_request(self, url, mutant): """ Perform a simple GET to see if the result is an error or not, and then run the actual fuzzing. """ response = self._uri_opener.GET( mutant, cache=True, headers=self._headers) if not (is_404(response) or response.get_code() in (403, 401) or self._return_without_eval(mutant)): for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) # # Save it to the kb (if new)! # if response.get_url() not in self._seen and response.get_url().get_file_name(): desc = 'A potentially interesting file was found at: "%s".' desc = desc % response.get_url() i = Info('Potentially interesting file', desc, response.id, self.get_name()) i.set_url(response.get_url()) kb.kb.append(self, 'files', i) om.out.information(i.get_desc()) # Report only once self._seen.add(response.get_url()) def _return_without_eval(self, uri): """ This method tries to lower the false positives. """ if not uri.has_query_string(): return False uri.set_file_name(uri.get_file_name() + rand_alnum(7)) try: response = self._uri_opener.GET(uri, cache=True, headers=self._headers) except BaseFrameworkException, e: msg = 'An exception was raised while requesting "%s", the error' msg += 'message is: "%s"' om.out.error(msg % (uri, e)) else:
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._already_tested = ScalableBloomFilter()
class find_dvcs(CrawlPlugin): """ Search Git, Mercurial (HG), Bazaar (BZR), Subversion (SVN) and CVS repositories and checks for files containing :author: Adam Baldwin ([email protected]) :author: Tomas Velazquez (tomas.velazquezz - gmail.com) """ def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = ScalableBloomFilter() self._analyzed_filenames = ScalableBloomFilter() self._dvcs = { 'git repository': {}, 'git ignore': {}, 'hg repository': {}, 'hg ignore': {}, 'bzr repository': {}, 'bzr ignore': {}, 'svn repository': {}, 'svn ignore': {}, 'cvs repository': {}, 'cvs ignore': {} } self._dvcs['git repository']['filename'] = '.git/index' self._dvcs['git repository']['function'] = self.git_index self._dvcs['git ignore']['filename'] = '.gitignore' self._dvcs['git ignore']['function'] = self.ignore_file self._dvcs['hg repository']['filename'] = '.hg/dirstate' self._dvcs['hg repository']['function'] = self.hg_dirstate self._dvcs['hg ignore']['filename'] = '.hgignore' self._dvcs['hg ignore']['function'] = self.ignore_file self._dvcs['bzr repository']['filename'] = '.bzr/checkout/dirstate' self._dvcs['bzr repository']['function'] = self.bzr_checkout_dirstate self._dvcs['bzr ignore']['filename'] = '.bzrignore' self._dvcs['bzr ignore']['function'] = self.ignore_file self._dvcs['svn repository']['filename'] = '.svn/entries' self._dvcs['svn repository']['function'] = self.svn_entries self._dvcs['svn ignore']['filename'] = '.svnignore' self._dvcs['svn ignore']['function'] = self.ignore_file self._dvcs['cvs repository']['filename'] = 'CVS/Entries' self._dvcs['cvs repository']['function'] = self.cvs_entries self._dvcs['cvs ignore']['filename'] = '.cvsignore' self._dvcs['cvs ignore']['function'] = self.ignore_file def crawl(self, fuzzable_request): """ For every directory, fetch a list of files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ domain_path = fuzzable_request.get_url().get_domain_path() if domain_path not in self._analyzed_dirs: self._analyzed_dirs.add(domain_path) test_generator = self._url_generator(domain_path) self.worker_pool.map_multi_args(self._send_and_check, test_generator) def _url_generator(self, domain_path): """ Based on different URLs with directories, generate the URLs that need to be tested. :return: URLs """ for repo in self._dvcs.keys(): repo_url = domain_path.url_join(self._dvcs[repo]['filename']) _function = self._dvcs[repo]['function'] yield repo_url, _function, repo, domain_path def _clean_filenames(self, filenames): """ Filter some characters from filenames. :return: A clear list of filenames. """ resources = set() for filename in filenames: # Sometimes we get random bytes from the .git/index because of # git versions we don't fully support, so we ignore any encoding # errors filename = smart_unicode(filename, errors='ignore') if filename.startswith('/'): filename = filename[1:] if filename.startswith('./'): filename = filename[2:] if filename.endswith('/'): filename = filename[:-1] resources.add(filename) return resources def _send_and_check(self, repo_url, repo_get_files, repo, domain_path): """ Check if a repository index exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ http_response = self.http_get_and_parse(repo_url, binary_response=True, respect_size_limit=False) if is_404(http_response): return try: filenames = repo_get_files(http_response.get_raw_body()) except Exception, e: # We get here when the HTTP response is NOT a 404, but the response # body couldn't be properly parsed. This is usually because of a false # positive in the is_404 function, OR a new version-format of the file # to be parsed. # # Log in order to be able to improve the framework. args = (e, repo_get_files.__name__, repo_url) om.out.debug('Got a "%s" exception while running "%s" on "%s"' % args) else:
class wsdl_finder(CrawlPlugin): """ Find web service definitions files. :author: Andres Riancho ([email protected]) """ WSDL = ('?wsdl', '?WSDL') def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._already_tested = ScalableBloomFilter() def crawl(self, fuzzable_request): """ If url not in _tested, append a ?WSDL and check the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ url = fuzzable_request.get_url().uri2url() url_string = url.url_string if url_string not in self._already_tested: self._already_tested.add(url_string) wsdl_url_generator = self.wsdl_url_generator(url_string) self.worker_pool.map(self._do_request, wsdl_url_generator, chunksize=1) def wsdl_url_generator(self, url_string): for wsdl_parameter in self.WSDL: url_to_request = url_string + wsdl_parameter url_instance = URL(url_to_request) yield url_instance def _do_request(self, url_to_request): """ Perform an HTTP request to the url_to_request parameter. :return: None. """ try: self._uri_opener.GET(url_to_request, cache=True) except BaseFrameworkException: om.out.debug('Failed to request the WSDL file: ' + url_to_request) else: # The response is analyzed by the wsdlGreper plugin pass def get_plugin_deps(self): """ :return: A list with the names of the plugins that should be run before the current one. """ return ['grep.wsdl_greper'] def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class dav(AuditPlugin): """ Verify if the WebDAV module is properly configured. :author: Andres Riancho ([email protected]) """ CONTENT_TYPE = Headers([('content-type', 'application/xml; charset="utf-8"')]) def __init__(self): AuditPlugin.__init__(self) # Internal variables self._already_tested_dirs = ScalableBloomFilter() def audit(self, freq, orig_response, debugging_id): """ Searches for file upload vulns using PUT method. :param freq: A FuzzableRequest :param orig_response: The HTTP response associated with the fuzzable request :param debugging_id: A unique identifier for this call to audit() """ # Start domain_path = freq.get_url().get_domain_path() if domain_path not in self._already_tested_dirs: self._already_tested_dirs.add(domain_path) # # Send the three requests in different threads, store the # apply_result objects in order to be able to "join()" in the # next for loop # # TODO: This seems to be a fairly common use case: Send args to N # functions that need to be run in different threads. If possible # code this into threadpool.py in order to make this code clearer results = [] for func in [self._PUT, self._PROPFIND, self._SEARCH]: apply_res = self.worker_pool.apply_async(func, (domain_path,)) results.append(apply_res) for apply_res in results: apply_res.get() #pylint: disable=C0103 def _SEARCH(self, domain_path): """ Test SEARCH method. """ content = "<?xml version='1.0'?>\r\n" content += "<g:searchrequest xmlns:g='DAV:'>\r\n" content += "<g:sql>\r\n" content += "Select 'DAV:displayname' from scope()\r\n" content += "</g:sql>\r\n" content += "</g:searchrequest>\r\n" res = self._uri_opener.SEARCH(domain_path, data=content, headers=self.CONTENT_TYPE) content_matches = '<a:response>' in res or '<a:status>' in res or \ 'xmlns:a="DAV:"' in res if content_matches and res.get_code() in xrange(200, 300): msg = 'Directory listing with HTTP SEARCH method was found at' \ 'directory: "%s".' % domain_path v = Vuln('Insecure DAV configuration', msg, severity.MEDIUM, res.id, self.get_name()) v.set_url(res.get_url()) v.set_method('SEARCH') self.kb_append(self, 'dav', v) #pylint: disable=C0103 def _PROPFIND(self, domain_path): """ Test PROPFIND method """ content = "<?xml version='1.0'?>\r\n" content += "<a:propfind xmlns:a='DAV:'>\r\n" content += "<a:prop>\r\n" content += "<a:displayname:/>\r\n" content += "</a:prop>\r\n" content += "</a:propfind>\r\n" headers = copy.deepcopy(self.CONTENT_TYPE) headers['Depth'] = '1' res = self._uri_opener.PROPFIND(domain_path, data=content, headers=headers) if "D:href" in res and res.get_code() in xrange(200, 300): msg = 'Directory listing with HTTP PROPFIND method was found at' \ ' directory: "%s".' % domain_path v = Vuln('Insecure DAV configuration', msg, severity.MEDIUM, res.id, self.get_name()) v.set_url(res.get_url()) v.set_method('PROPFIND') self.kb_append(self, 'dav', v) #pylint: disable=C0103 def _PUT(self, domain_path): """ Tests PUT method. """ # upload url = domain_path.url_join(rand_alpha(5)) rnd_content = rand_alnum(6) headers = Headers([('content-type', 'text/plain')]) put_response = self._uri_opener.PUT(url, data=rnd_content, headers=headers) # check if uploaded res = self._uri_opener.GET(url, cache=True) if res.get_body() == rnd_content: msg = 'File upload with HTTP PUT method was found at resource:' \ ' "%s". A test file was uploaded to: "%s".' msg = msg % (domain_path, res.get_url()) v = Vuln('Publicly writable directory', msg, severity.HIGH, [put_response.id, res.id], self.get_name()) v.set_url(url) v.set_method('PUT') self.kb_append(self, 'dav', v) # Report some common errors elif put_response.get_code() == 500: msg = 'DAV seems to be incorrectly configured. The web server' \ ' answered with a 500 error code. In most cases, this means'\ ' that the DAV extension failed in some way. This error was'\ ' found at: "%s".' % put_response.get_url() i = Info('DAV incorrect configuration', msg, res.id, self.get_name()) i.set_url(url) i.set_method('PUT') self.kb_append(self, 'dav', i) # Report some common errors elif put_response.get_code() == 403: # handle false positive when PUT method is not supported # https://github.com/andresriancho/w3af/pull/2724/files if 'supported' in put_response.get_body().lower(): return msg = 'DAV seems to be correctly configured and allowing you to'\ ' use the PUT method but the directory does not have the'\ ' right permissions that would allow the web server to'\ ' write to it. This error was found at: "%s".' msg = msg % put_response.get_url() i = Info('DAV incorrect configuration', msg, [put_response.id, res.id], self.get_name()) i.set_url(url) i.set_method('PUT') self.kb_append(self, 'dav', i) def get_plugin_deps(self): """ :return: A list with the names of the plugins that should be run before the current one. """ return ['infrastructure.allowed_methods', 'infrastructure.server_header'] def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class Fingerprint404(object): """ Read the 404 page(s) returned by the server. :author: Andres Riancho ([email protected]) """ _instance = None def __init__(self): # # Set the opener, I need it to perform some tests and gain # the knowledge about the server's 404 response bodies. # self._uri_opener = None self._worker_pool = None # # Internal variables # self._already_analyzed = False self._lock = thread.allocate_lock() self._directory_uses_404_codes = ScalableBloomFilter() self._clean_404_response_db_calls = 0 # # There are two different 404 response databases, the base one is # created during the scan initialization and will not be modified # during the scan. The extended 404 DB is used during the scan to # store new knowledge about the 404 responses which are captured. # self._base_404_responses = deque(maxlen=MAX_404_RESPONSES) self._extended_404_responses = deque(maxlen=MAX_404_RESPONSES) # It is OK to store 1000 here, I'm only storing path+filename as the key, # and bool as the value. self.is_404_LRU = SynchronizedLRUDict(1000) def cleanup(self): self._base_404_responses = None self._extended_404_responses = None self.is_404_LRU = None self._already_analyzed = False self._directory_uses_404_codes = None self._clean_404_response_db_calls = 0 def set_url_opener(self, urlopener): self._uri_opener = urlopener def set_worker_pool(self, worker_pool): self._worker_pool = worker_pool def generate_404_knowledge(self, url): """ Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. """ # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = ('404 fingerprint database was incorrectly initialized.' ' URL opener is None.') raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # # This is a list of the most common handlers, in some configurations, # the 404 depends on the handler, so I want to make sure that I catch # the 404 for each one # handlers = { 'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar' } if extension: handlers.add(extension) test_urls = [] for handler_ext in handlers: rand_alnum_file = rand_alnum(8) + '.' + handler_ext url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) # Also keep in mind that in some cases we don't have an extension, so # we need to create a URL with just a filename if not extension: rand_alnum_file = rand_alnum(8) url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) imap_unordered = self._worker_pool.imap_unordered not_exist_resp_lst = [] for not_exist_resp in imap_unordered(self._send_404, test_urls): four_oh_data = FourOhFourResponse(not_exist_resp) not_exist_resp_lst.append(four_oh_data) # # Populate the self._directory_uses_404_codes with the information # we just retrieved from the application # if not_exist_resp.get_code() == 404: url_404 = not_exist_resp.get_uri() path_extension = (url_404.get_domain_path(), url_404.get_extension()) # No need to check if the ScalableBloomFilter contains the key # It is a "set", adding duplicates is a no-op. self._directory_uses_404_codes.add(path_extension) # # Sort the HTTP responses by length to try to have the same DB on # each call to generate_404_knowledge(). This is required because of # the imap_unordered() above, which will yield the responses in # unexpected order each time we call it. # def sort_by_response_length(a, b): return cmp(len(a.body), len(b.body)) not_exist_resp_lst.sort(sort_by_response_length) # # I have the 404 responses in not_exist_resp_lst, but maybe they # all look the same, so I'll filter the ones that look alike. # # Just add the first one to the 404 responses list, since that one is # "unique" # if len(not_exist_resp_lst): four_oh_data = not_exist_resp_lst[0] self._append_to_base_404_responses(four_oh_data) # And now add the unique responses for i in not_exist_resp_lst: for j in self._base_404_responses: if i is j: break if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): # i (or something really similar) already exists in the # self._base_404_responses, no need to compare any further break else: # None of the 404_responses match the item from not_exist_resp_lst # This means that this item is new and we should store it in the # 404_responses db self._append_to_base_404_responses(i) msg = 'The base 404 response DB contains responses with IDs: %s' args = (', '.join( str(r.id) for r in copy.copy(self._base_404_responses))) om.out.debug(msg % args) def _append_to_base_404_responses(self, data): self._base_404_responses.append(data) msg = ('Added 404 data for "%s" (id:%s, len:%s) to the base' ' 404 result database (size: %s/%s)') args = (data.url, data.id, len(data.body), len(self._base_404_responses), MAX_404_RESPONSES) om.out.debug(msg % args) def _append_to_extended_404_responses(self, data): self._extended_404_responses.append(data) msg = ('Added 404 data for "%s" (id:%s, len:%s)) to the extended' ' 404 result database (size: %s/%s)') args = (data.url, data.id, len(data.body), len(self._base_404_responses), MAX_404_RESPONSES) om.out.debug(msg % args) self.clean_404_response_db() def get_404_responses(self): all_404 = itertools.chain(copy.copy(self._base_404_responses), copy.copy(self._extended_404_responses)) for resp_404 in all_404: yield resp_404 def clean_404_response_db(self): """ During the scan, and because I chose to remove the very broad 404 database lock, the 404 response database might become untidy: the same HTTP response might be appended to the DB multiple times. An untidy DB triggers more comparisons between HTTP responses, which is CPU-intensive. This method cleans the DB every N calls to reduce any duplicates. :return: None. The extended DB is modified. """ self._clean_404_response_db_calls += 1 if self._clean_404_response_db_calls % CLEAN_DB_EVERY != 0: return removed_items = 0 extended_404_response_copy = copy.copy(self._extended_404_responses) for i in extended_404_response_copy: for j in extended_404_response_copy: if i is j: continue if not fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): continue # i (or something really similar) already exists in # self._extended_404_responses, no need to compare any further # just remove it and continue with the next try: self._extended_404_responses.remove(i) except ValueError: # The 404 response DB might have been changed by another thread break else: msg = ( 'Removed 404 response for "%s" (id: %s) from the 404 DB' ' because it matches 404 response "%s" (id: %s)') args = (i.url, i.id, j.url, j.id) om.out.debug(msg % args) removed_items += 1 break msg = 'Called clean 404 response DB. Removed %s duplicates from DB.' args = (removed_items, ) om.out.debug(msg % args) msg = 'The extended 404 response DB contains responses with IDs: %s' args = (', '.join( str(r.id) for r in copy.copy(self._extended_404_responses))) om.out.debug(msg % args) @retry(tries=2, delay=0.5, backoff=2) def _send_404(self, url404, debugging_id=None): """ Sends a GET request to url404. :return: The HTTP response body. """ # I don't use the cache, because the URLs are random and the only thing # that cache does is to fill up disk space try: response = self._uri_opener.GET(url404, cache=False, grep=False, debugging_id=debugging_id) except HTTPRequestException, hre: message = 'Exception found while detecting 404: "%s"' om.out.debug(message % hre) raise FourOhFourDetectionException(message % hre) return response
class get_emails(GrepPlugin): """ Find email accounts. :author: Andres Riancho ([email protected]) """ def __init__(self): GrepPlugin.__init__(self) # User configured variables self._only_target_domain = True self._already_reported = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point, get the emails and save them to the kb. :param request: The HTTP request :param request: The HTTP response :return: None """ self._grep_worker(request, response, 'emails', response.get_url().get_root_domain()) if not self._only_target_domain: self._grep_worker(request, response, 'external_emails') def _grep_worker(self, request, response, kb_key, domain=None): """ Helper method for using in self.grep() :param request: The HTTP request :param response: The HTTP response :param kb_key: Knowledge base dict key :param domain: Target domain for get_emails filter :return: None """ try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: msg = 'Failed to get document parser for "%s" at get_emails.' om.out.debug(msg % response.get_url()) return emails = set(dp.get_emails(domain)) for mail_address in emails: # Reduce false positives if request.sent(mail_address): continue # Email address are case insensitive mail_address = mail_address.lower() url = response.get_url() uniq_key = (mail_address, url) if uniq_key in self._already_reported: continue # Avoid dups self._already_reported.add(uniq_key) # Create a new info object, and report it desc = 'The mail account: "%s" was found at "%s".' desc = desc % (mail_address, url) i = Info('Email address disclosure', desc, response.id, self.get_name()) i.add_to_highlight(mail_address) i.set_url(url) i[EmailInfoSet.ITAG] = mail_address i['user'] = mail_address.split('@')[0] self.kb_append_uniq_group('emails', kb_key, i, group_klass=EmailInfoSet) def set_options(self, options_list): self._only_target_domain = options_list['only_target_domain'].get_value() def get_options(self): """ :return: A list of option objects for this plugin. """ ol = OptionList() d1 = 'Only search emails for domain of target' o1 = opt_factory('only_target_domain', self._only_target_domain, d1, 'boolean') ol.add(o1) return ol def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): super(shell_shock, self).__init__() self.already_tested_urls = ScalableBloomFilter()
class web_spider(CrawlPlugin): """ Crawl the web application. :author: Andres Riancho ([email protected]) """ UNAUTH_FORBID = {http_constants.UNAUTHORIZED, http_constants.FORBIDDEN} def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet(table_prefix='web_spider') self._first_run = True self._target_urls = [] self._target_domain = None self._already_filled_form = ScalableBloomFilter() self._variant_db = VariantDB() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._compile_re() def crawl(self, fuzzable_req): """ Searches for links on the html. :param fuzzable_req: A fuzzable_req instance that contains (among other things) the URL to test. """ self._handle_first_run() # # If it is a form, then smart_fill the parameters to send something that # makes sense and will allow us to cover more code. # data_container = fuzzable_req.get_raw_data() if isinstance(data_container, Form): if fuzzable_req.get_url() in self._already_filled_form: return self._already_filled_form.add(fuzzable_req.get_url()) data_container.smart_fill() # Send the HTTP request resp = self._uri_opener.send_mutant(fuzzable_req) # Nothing to do here... if resp.get_code() == http_constants.UNAUTHORIZED: return # Nothing to do here... if resp.is_image(): return # And we don't trust what comes from the core, check if 404 if is_404(resp): return self._extract_html_forms(resp, fuzzable_req) self._extract_links_and_verify(resp, fuzzable_req) def _extract_html_forms(self, resp, fuzzable_req): """ Parses the HTTP response body and extract HTML forms, resulting forms are put() on the output queue. """ # Try to find forms in the document try: dp = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException: # Failed to find a suitable parser for the document return # Create one FuzzableRequest for each form variant mode = cf.cf.get('form_fuzzing_mode') for form_params in dp.get_forms(): if not self._should_analyze_url(form_params.get_action()): continue headers = fuzzable_req.get_headers() for form_params_variant in form_params.get_variants(mode): data_container = dc_from_form_params(form_params_variant) # Now data_container is one of Multipart of URLEncoded form # instances, which is a DataContainer. Much better than the # FormParameters instance we had before in form_params_variant r = FuzzableRequest.from_form(data_container, headers=headers) self.output_queue.put(r) def _handle_first_run(self): if self._first_run: # I have to set some variables, in order to be able to code # the "only_forward" feature self._first_run = False self._target_urls = [i.uri2url() for i in cf.cf.get('targets')] # The following line triggered lots of bugs when the "stop" button # was pressed and the core did this: "cf.cf.save('targets', [])" # # self._target_domain = cf.cf.get('targets')[0].get_domain() # # Changing it to something awful but bug-free. targets = cf.cf.get('targets') if not targets: return else: self._target_domain = targets[0].get_domain() def _urls_to_verify_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ gen = itertools.chain(self._body_url_generator(resp, fuzzable_req), headers_url_generator(resp, fuzzable_req)) for ref, fuzzable_req, original_resp, possibly_broken in gen: if self._should_verify_extracted_url(ref, original_resp): yield ref, fuzzable_req, original_resp, possibly_broken def _body_url_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not The newly found URLs are extracted from the http response body using one of the framework's parsers. :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # # Note: I WANT to follow links that are in the 404 page. # try: doc_parser = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException, w3: om.out.debug('Failed to find a suitable document parser. ' 'Exception "%s"' % w3) else:
class shell_shock(AuditPlugin): """ Find shell shock vulnerabilities. :author: Andres Riancho ([email protected]) """ DELAY_TESTS = [ PingDelay('() { test; }; ping -c %s 127.0.0.1'), ExactDelay('() { test; }; sleep %s') ] def __init__(self): super(shell_shock, self).__init__() self.already_tested_urls = ScalableBloomFilter() def audit(self, freq, orig_response): """ Tests an URL for shell shock vulnerabilities. :param freq: A FuzzableRequest """ url = freq.get_url() # Here the script is vulnerable, not a specific parameter, so we # run unique tests per URL if url not in self.already_tested_urls: self.already_tested_urls.add(url) # We are implementing these methods for detecting shell-shock vulns # if you know about other methods, or have improvements on these # please let us know. Pull-requests are also welcome. for detection_method in [ self._with_header_echo_injection, #self._with_body_echo_injection, self._with_time_delay ]: if detection_method(freq): break def _with_header_echo_injection(self, freq): """ We're sending a payload that will trigger the injection of various headers in the HTTP response body. :param freq: A FuzzableRequest :return: True if a vulnerability was found """ injected_header = 'shellshock' injected_value = 'check' payload = '() { :;}; echo "%s: %s"' % (injected_header, injected_value) for mutant in self.create_mutants(freq, TEST_HEADERS): mutant.set_token_value(payload) response = self._uri_opener.send_mutant(mutant) header_value, header_name = response.get_headers().iget( injected_header) if header_value is not None and injected_value in header_value.lower( ): desc = u'Shell shock was found at: %s' % mutant.found_at() v = Vuln.from_mutant(u'Shell shock vulnerability', desc, severity.HIGH, [response.id], self.get_name(), mutant) self.kb_append_uniq(self, 'shell_shock', v) return True def _with_body_echo_injection(self, freq): """ We're sending a payload that will trigger the injection of new lines that will make the response transition from "headers" to "body". :param freq: A FuzzableRequest :return: True if a vulnerability was found """ raise NotImplementedError def create_mutants(self, freq, headers_name): for header_name in headers_name: headers = freq.get_headers() headers[header_name] = '' freq.set_headers(headers) fuzzer_config = {'fuzzable_headers': [header_name]} mutant = HeadersMutant.create_mutants(freq, [''], [header_name], False, fuzzer_config)[0] yield mutant def _with_time_delay(self, freq): """ Tests an URLs for shell shock vulnerabilities using time delays. :param freq: A FuzzableRequest :return: True if a vulnerability was found """ for mutant in self.create_mutants(freq, TEST_HEADERS): for delay_obj in self.DELAY_TESTS: ed = ExactDelayController(mutant, delay_obj, self._uri_opener) success, responses = ed.delay_is_controlled() if success: mutant.set_token_value(delay_obj.get_string_for_delay(3)) desc = u'Shell shock was found at: %s' % mutant.found_at() v = Vuln.from_mutant(u'Shell shock vulnerability', desc, severity.HIGH, [r.id for r in responses], self.get_name(), mutant) self.kb_append_uniq(self, 'shell_shock', v) return True def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
def __init__(self): GrepPlugin.__init__(self) self._already_inspected = ScalableBloomFilter() self._ignore_if_match = None
class html_comments(GrepPlugin): """ Extract and analyze HTML comments. :author: Andres Riancho ([email protected]) """ HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>') INTERESTING_WORDS = ( # In English 'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack', 'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit', 'pass', 'password', 'passwd', 'pwd', 'secret', 'stupid', # In Spanish 'tonto', 'porqueria', 'cuidado', 'usuario', u'contraseña', 'puta', 'email', 'security', 'captcha', 'pinga', 'cojones', # some in Portuguese 'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia', u'depósito', u'cartão', u'crédito', 'dados pessoais' ) _multi_in = multi_in([' %s ' % w for w in INTERESTING_WORDS]) def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict() self._already_reported_interesting = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point, parse those comments! :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html(): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return for comment in dp.get_comments(): # These next two lines fix this issue: # audit.ssi + grep.html_comments + web app with XSS = false positive if request.sent(comment): continue if self._is_new(comment, response): self._interesting_word(comment, request, response) self._html_in_comment(comment, request, response) def _interesting_word(self, comment, request, response): """ Find interesting words in HTML comments """ comment = comment.lower() for word in self._multi_in.query(comment): if (word, response.get_url()) not in self._already_reported_interesting: desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (word, response.get_url()) i = Info.from_fr('Interesting HTML comment', desc, response.id, self.get_name(), request) i.add_to_highlight(word) kb.kb.append(self, 'interesting_comments', i) om.out.information(i.get_desc()) self._already_reported_interesting.add((word, response.get_url())) def _html_in_comment(self, comment, request, response): """ Find HTML code in HTML comments """ html_in_comment = self.HTML_RE.search(comment) if html_in_comment and \ (comment, response.get_url()) not in self._already_reported_interesting: # There is HTML code in the comment. comment = comment.strip() comment = comment.replace('\n', '') comment = comment.replace('\r', '') comment = comment[:40] desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (comment, response.get_url()) i = Info.from_fr('HTML comment contains HTML code', desc, response.id, self.get_name(), request) i.set_uri(response.get_uri()) i.add_to_highlight(html_in_comment.group(0)) kb.kb.append(self, 'html_comment_hides_html', i) om.out.information(i.get_desc()) self._already_reported_interesting.add((comment, response.get_url())) def _is_new(self, comment, response): """ Make sure that we perform a thread safe check on the self._comments dict, in order to avoid duplicates. """ with self._plugin_lock: #pylint: disable=E1103 comment_data = self._comments.get(comment, None) if comment_data is None: self._comments[comment] = [(response.get_url(), response.id), ] return True else: if response.get_url() not in [x[0] for x in comment_data]: comment_data.append((response.get_url(), response.id)) self._comments[comment] = comment_data return True #pylint: enable=E1103 return False def end(self): """ This method is called when the plugin wont be used anymore. :return: None """ inform = [] for comment in self._comments.iterkeys(): urls_with_this_comment = self._comments[comment] stick_comment = ' '.join(comment.split()) if len(stick_comment) > 40: msg = 'A comment with the string "%s..." (and %s more bytes)'\ ' was found on these URL(s):' om.out.information( msg % (stick_comment[:40], str(len(stick_comment) - 40))) else: msg = 'A comment containing "%s" was found on these URL(s):' om.out.information(msg % (stick_comment)) for url, request_id in urls_with_this_comment: inform.append('- ' + url + ' (request with id: ' + str(request_id) + ')') inform.sort() for i in inform: om.out.information(i) self._comments.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """