def _send_and_check(self, repo_url, repo_get_files, repo, domain_path): ''' Check if a repository index exists in the domain_path. :return: None, everything is saved to the self.out_queue. ''' http_response = self.http_get_and_parse(repo_url) if not is_404(http_response): filenames = repo_get_files(http_response.get_body()) parsed_url_set = set() for filename in self._clean_filenames(filenames): test_url = domain_path.url_join(filename) if test_url not in self._analyzed_filenames: parsed_url_set.add(test_url) self._analyzed_filenames.add(filename) self.worker_pool.map(self.http_get_and_parse, parsed_url_set) if parsed_url_set: desc = 'A %s was found at: "%s"; this could indicate that'\ ' a %s is accessible. You might be able to download'\ ' the Web application source code.' desc = desc % (repo, http_response.get_url(), repo) v = Vuln('Source code repository', desc, severity.MEDIUM, http_response.id, self.get_name()) v.set_url(http_response.get_url()) kb.kb.append(self, repo, v) om.out.vulnerability(v.get_desc(), severity=v.get_severity())
def _do_request(self, url, mutant): ''' Perform a simple GET to see if the result is an error or not, and then run the actual fuzzing. ''' response = self._uri_opener.GET( mutant, cache=True, headers=self._headers) if not (is_404(response) or response.get_code() in (403, 401) or self._return_without_eval(mutant)): for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) # # Save it to the kb (if new)! # if response.get_url() not in self._seen and response.get_url().get_file_name(): desc = 'A potentially interesting file was found at: "%s".' desc = desc % response.get_url() i = Info('Potentially interesting file', desc, response.id, self.get_name()) i.set_url(response.get_url()) kb.kb.append(self, 'files', i) om.out.information(i.get_desc()) # Report only once self._seen.add(response.get_url())
def _classic_worker(self, gh, search_term): ''' Perform the searches and store the results in the kb. ''' google_list = self._google_se.get_n_results(search_term, 9) for result in google_list: # I found a vuln in the site! response = self._uri_opener.GET(result.URL, cache=True) if not is_404(response): desc = 'ghdb plugin found a vulnerability at URL: "%s".' \ ' According to GHDB the vulnerability description'\ ' is "%s".' desc = desc % (response.get_url(), gh.desc) v = Vuln('Google hack database match', desc, severity.MEDIUM, response.id, self.get_name()) v.set_url(response.get_url()) v.set_method('GET') kb.kb.append(self, 'vuln', v) om.out.vulnerability(v.get_desc(), severity=severity.LOW) # Create the fuzzable requests for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr)
def _confirm_file_upload(self, path, mutant, http_response): ''' Confirms if the file was uploaded to path :param path: The URL where we suspect that a file was uploaded to. :param mutant: The mutant that originated the file on the remote end :param http_response: The HTTP response asociated with sending mutant ''' get_response = self._uri_opener.GET(path, cache=False) if not is_404(get_response) and self._has_no_bug(mutant): # This is necessary, if I don't do this, the session # saver will break cause REAL file objects can't # be picked mutant.set_mod_value('<file_object>') desc = 'A file upload to a directory inside the webroot' \ ' was found at: %s' % mutant.found_at() v = Vuln.from_mutant('Insecure file upload', desc, severity.HIGH, [http_response.id, get_response.id], self.get_name(), mutant) v['file_dest'] = get_response.get_url() v['file_vars'] = mutant.get_file_vars() self.kb_append_uniq(self, 'file_upload', v) return
def grep(self, request, response): ''' Plugin entry point. Get responses, analyze words, create dictionary. :param request: The HTTP request object. :param response: The HTTP response object :return: None. ''' if not self.got_lang(): return # I added the 404 code here to avoid doing some is_404 lookups if response.get_code() not in [500, 401, 403, 404] \ and not is_404(response) \ and request.get_method() in ['POST', 'GET']: # Run the plugins data = self._run_plugins(response) with self._plugin_lock: old_data = kb.kb.raw_read('password_profiling', 'password_profiling') new_data = self.merge_maps(old_data, data, request, self.captured_lang) new_data = self._trim_data(new_data) # save the updated map kb.kb.raw_write(self, 'password_profiling', new_data)
def _confirm_file_upload(self, path, mutant, http_response): """ Confirms if the file was uploaded to path :param path: The URL where we suspect that a file was uploaded to. :param mutant: The mutant that originated the file on the remote end :param http_response: The HTTP response asociated with sending mutant """ get_response = self._uri_opener.GET(path, cache=False) if not is_404(get_response) and self._has_no_bug(mutant): # This is necessary, if I don't do this, the session # saver will break cause REAL file objects can't # be picked mutant.set_mod_value("<file_object>") desc = "A file upload to a directory inside the webroot" " was found at: %s" % mutant.found_at() v = Vuln.from_mutant( "Insecure file upload", desc, severity.HIGH, [http_response.id, get_response.id], self.get_name(), mutant, ) v["file_dest"] = get_response.get_url() v["file_vars"] = mutant.get_file_vars() self.kb_append_uniq(self, "file_upload", v) return
def _force_disclosures(self, domain_path, potentially_vulnerable_paths): ''' :param domain_path: The path to wordpress' root directory :param potentially_vulnerable_paths: A list with the paths I'll URL-join with @domain_path, GET and parse. ''' for pvuln_path in potentially_vulnerable_paths: pvuln_url = domain_path.url_join(pvuln_path) response = self._uri_opener.GET(pvuln_url, cache=True) if is_404(response): continue response_body = response.get_body() if 'Fatal error: ' in response_body: desc = 'Analyze the HTTP response body to find the full path'\ ' where wordpress was installed.' i = Info('WordPress path disclosure', desc, response.id, self.get_name()) i.set_url(pvuln_url) kb.kb.append(self, 'info', i) om.out.information(i.get_desc()) break
def audit(self, freq, orig_response): ''' Searches for file upload vulns using a POST to author.dll. :param freq: A FuzzableRequest ''' domain_path = freq.get_url().get_domain_path() if kb.kb.get(self, 'frontpage'): # Nothing to do, I have found vuln(s) and I should stop on first msg = 'Not verifying if I can upload files to: "%s" using'\ ' author.dll. Because I already found a vulnerability.' om.out.debug(msg) return # I haven't found any vulns yet, OR i'm trying to find every # directory where I can write a file. if domain_path not in self._already_tested: self._already_tested.add(domain_path) # Find a file that doesn't exist and then try to upload it for _ in xrange(3): rand_file = rand_alpha(5) + '.html' rand_path_file = domain_path.url_join(rand_file) res = self._uri_opener.GET(rand_path_file) if is_404(res): upload_id = self._upload_file(domain_path, rand_file) self._verify_upload(domain_path, rand_file, upload_id) break else: msg = 'frontpage plugin failed to find a 404 page. This is'\ ' mostly because of an error in 404 page detection.' om.out.error(msg)
def _extract_paths(self, domain_path): ''' :param domain_path: The URL object pointing to the current wordpress installation :return: A list with the paths that might trigger full path disclosures TODO: Will fail if WordPress is running on a Windows server due to paths manipulation. ''' theme_paths = [] wp_root_response = self._uri_opener.GET(domain_path, cache=True) if not is_404(wp_root_response): response_body = wp_root_response.get_body() theme_regexp = '%swp-content/themes/(.*)/style.css' % domain_path theme = re.search(theme_regexp, response_body, re.IGNORECASE) if theme: theme_name = theme.group(1) for fname in ('header', 'footer'): path_fname = 'wp-content/themes/%s/%s.php' % (theme_name, fname) theme_paths.append(path_fname) return theme_paths
def discover(self, fuzzable_request): ''' For every directory, fetch a list of files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' for domain_path in fuzzable_request.get_url().get_directories(): if domain_path not in self._analyzed_dirs: # Save the domain_path so I know I'm not working in vane self._analyzed_dirs.add(domain_path) # Request the file frontpage_info_url = domain_path.url_join("_vti_inf.html") try: response = self._uri_opener.GET(frontpage_info_url, cache=True) except w3afException, w3: msg = 'Failed to GET Frontpage Server _vti_inf.html file: "' msg += frontpage_info_url + \ '". Exception: "' + str(w3) + '".' om.out.debug(msg) else: # Check if it's a Frontpage Info file if not is_404(response): for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) self._analyze_response(response)
def crawl(self, fuzzable_request): ''' Finds the version of a WordPress installation. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' if not self._exec: # This will remove the plugin from the crawl plugins to be run. raise w3afRunOnce() # # Check if the server is running wp # domain_path = fuzzable_request.get_url().get_domain_path() # Main scan URL passed from w3af + unique wp file wp_unique_url = domain_path.url_join('wp-login.php') response = self._uri_opener.GET(wp_unique_url, cache=True) # If wp_unique_url is not 404, wordpress = true if not is_404(response): # It was possible to analyze wp-login.php, don't run again self._exec = False # Analyze the identified wordpress installation self._fingerprint_wordpress(domain_path, wp_unique_url, response) # Extract the links for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr)
def _is_possible_backdoor(self, response): ''' Heuristic to infer if the content of <response> has the pattern of a backdoor response. :param response: HTTPResponse object :return: A bool value ''' if not is_404(response): body_text = response.get_body() dom = response.get_dom() if dom is not None: for ele, attrs in BACKDOOR_COLLECTION.iteritems(): for attrname, attr_vals in attrs.iteritems(): # Set of lowered attribute values dom_attr_vals = \ set(n.get(attrname).lower() for n in (dom.xpath('//%s[@%s]' % (ele, attrname)))) # If at least one elem in intersection return True if (dom_attr_vals and set(attr_vals)): return True # If no regex matched then try with keywords. At least 2 should be # contained in 'body_text' to succeed. times = 0 for back_kw in KNOWN_OFFENSIVE_WORDS: if re.search(back_kw, body_text, re.I): times += 1 if times == 2: return True return False
def _extract_paths(self, domain_path): ''' :param domain_path: The URL object pointing to the current wordpress installation :return: A list with the paths that might trigger full path disclosures TODO: Will fail if WordPress is running on a Windows server due to paths manipulation. ''' theme_paths = [] wp_root_response = self._uri_opener.GET(domain_path, cache=True) if not is_404(wp_root_response): response_body = wp_root_response.get_body() theme_regexp = '%swp-content/themes/(.*)/style.css' % domain_path theme = re.search(theme_regexp, response_body, re.IGNORECASE) if theme: theme_name = theme.group(1) for fname in ('header', 'footer'): path_fname = 'wp-content/themes/%s/%s.php' % ( theme_name, fname) theme_paths.append(path_fname) return theme_paths
def _setup_404_detection(self): # # NOTE: I need to perform this test here in order to avoid some weird # thread locking that happens when the webspider calls is_404, and # because I want to initialize the is_404 database in a controlled # try/except block. # from core.controllers.core_helpers.fingerprint_404 import is_404 for url in cf.cf.get("targets"): try: response = self._w3af_core.uri_opener.GET(url, cache=True) is_404(response) except w3afMustStopByUserRequest: raise except Exception, e: msg = "Failed to initialize the 404 detection, original" ' exception was: "%s".' raise w3afMustStopException(msg % e)
def grep(self, request, response): ''' Plugin entry point, search for meta tags. :param request: The HTTP request object. :param response: The HTTP response object :return: None ''' uri = response.get_uri() if not response.is_text_or_html() or uri in self._already_inspected\ or is_404(response): return self._already_inspected.add(uri) try: dp = parser_cache.dpc.get_document_parser_for(response) except w3afException: return meta_tag_list = dp.get_meta_tags() for tag in meta_tag_list: tag_name = self._find_name(tag) for key, val in tag.items(): for word in self.INTERESTING_WORDS: # Check if we have something interesting # and WHERE that thing actually is where = content = None if (word in key): where = 'name' content = key elif (word in val): where = 'value' content = val # Now... if we found something, report it =) if where is not None: # The atribute is interesting! fmt = 'The URI: "%s" sent a <meta> tag with attribute'\ ' %s set to "%s" which looks interesting.' desc = fmt % (response.get_uri(), where, content) if self.INTERESTING_WORDS.get(tag_name, None): usage = self.INTERESTING_WORDS[tag_name] desc += ' The tag is used for %s.' % usage i = Info('Interesting META tag', desc, response.id, self.get_name()) i.set_uri(response.get_uri()) i.add_to_highlight(where, content) self.kb_append_uniq(self, 'meta_tags', i, 'URL')
def _setup_404_detection(self): # # NOTE: I need to perform this test here in order to avoid some weird # thread locking that happens when the webspider calls is_404, and # because I want to initialize the is_404 database in a controlled # try/except block. # from core.controllers.core_helpers.fingerprint_404 import is_404 for url in cf.cf.get('targets'): try: response = self._w3af_core.uri_opener.GET(url, cache=True) is_404(response) except w3afMustStopByUserRequest: raise except Exception, e: msg = 'Failed to initialize the 404 detection, original' \ ' exception was: "%s".' raise w3afMustStopException(msg % e)
def _verify_reference(self, reference, original_request, original_url, possibly_broken): ''' This method GET's every new link and parses it in order to get new links and forms. ''' # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier that simply ignoring the # referer # referer = original_url.base_url().url_string headers = Headers([('Referer', referer)]) try: resp = self._uri_opener.GET(reference, cache=True, headers=headers, follow_redir=False) except w3afMustStopOnUrlError: pass else: fuzz_req_list = [] if is_404(resp): # Note: I WANT to follow links that are in the 404 page, but # if the page I fetched is a 404 then it should be ignored. # # add_self will be True when the response code is 401 or 403 # which is something needed for other plugins to keep poking # at that URL # # add_self will be False in all the other cases, for example # in the case where the response code is a 404, because we don't # want to return a 404 to the core. add_self = resp.get_code() in self.NOT_404 fuzz_req_list = self._create_fuzzable_requests( resp, request=original_request, add_self=add_self) if not possibly_broken and not add_self: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) else: om.out.debug('Adding relative reference "%s" ' 'to the result.' % reference) frlist = self._create_fuzzable_requests( resp, request=original_request) fuzz_req_list.extend(frlist) # Process the list. for fuzz_req in fuzz_req_list: fuzz_req.set_referer(referer) self.output_queue.put(fuzz_req)
def _verify_reference(self, reference, original_request, original_url, possibly_broken): ''' This method GET's every new link and parses it in order to get new links and forms. ''' # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier that simply ignoring the # referer # referer = original_url.base_url().url_string headers = Headers([('Referer', referer)]) try: resp = self._uri_opener.GET(reference, cache=True, headers=headers, follow_redir=False) except w3afMustStopOnUrlError: pass else: fuzz_req_list = [] if is_404(resp): # Note: I WANT to follow links that are in the 404 page, but # if the page I fetched is a 404 then it should be ignored. # # add_self will be True when the response code is 401 or 403 # which is something needed for other plugins to keep poking # at that URL # # add_self will be False in all the other cases, for example # in the case where the response code is a 404, because we don't # want to return a 404 to the core. add_self = resp.get_code() in self.NOT_404 fuzz_req_list = self._create_fuzzable_requests(resp, request=original_request, add_self=add_self) if not possibly_broken and not add_self: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) else: om.out.debug('Adding relative reference "%s" ' 'to the result.' % reference) frlist = self._create_fuzzable_requests(resp, request=original_request) fuzz_req_list.extend(frlist) # Process the list. for fuzz_req in fuzz_req_list: fuzz_req.set_referer(referer) self.output_queue.put(fuzz_req)
def _send_and_check(self, url): ''' Analyze XML files. ''' response = self._uri_opener.GET(url, cache=True) if is_404(response): return file_name = url.get_file_name() om.out.debug('Checking response for %s in ria_enumerator.' % response) self._analyze_gears_manifest(url, response, file_name) self._analyze_crossdomain_clientaccesspolicy(url, response, file_name)
def _extract_urls(self, fuzzable_request, response): ''' Extract information from the server-status page and return fuzzable requests to the caller. ''' res = self._create_fuzzable_requests(response) # Now really parse the file and create custom made fuzzable requests regex = '<td>.*?<td nowrap>(.*?)</td><td nowrap>.*? (.*?) HTTP/1' for domain, path in re.findall(regex, response.get_body()): if 'unavailable' in domain: domain = response.get_url().get_domain() # Check if the requested domain and the found one are equal. if domain == response.get_url().get_domain(): found_url = response.get_url( ).get_protocol() + '://' + domain + path found_url = URL(found_url) # They are equal, request the URL and create the fuzzable # requests tmp_res = self._uri_opener.GET(found_url, cache=True) if not is_404(tmp_res): res.extend(self._create_fuzzable_requests(tmp_res)) else: # This is a shared hosting server self._shared_hosting_hosts.append(domain) # Now that we are outsite the for loop, we can report the possible vulns if len(self._shared_hosting_hosts): desc = 'The web application under test seems to be in a shared'\ ' hosting.' v = Vuln.from_fr('Shared hosting', desc, severity.MEDIUM, response.id, self.get_name(), fuzzable_request) self._shared_hosting_hosts = list( set(self._shared_hosting_hosts)) v['also_in_hosting'] = self._shared_hosting_hosts kb.kb.append(self, 'shared_hosting', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) msg = 'This list of domains, and the domain of the web application'\ ' under test, all point to the same server:' om.out.vulnerability(msg, severity=severity.MEDIUM) for url in self._shared_hosting_hosts: om.out.vulnerability('- ' + url, severity=severity.MEDIUM) return res
def _extract_urls(self, fuzzable_request, response): ''' Extract information from the server-status page and return fuzzable requests to the caller. ''' res = self._create_fuzzable_requests(response) # Now really parse the file and create custom made fuzzable requests regex = '<td>.*?<td nowrap>(.*?)</td><td nowrap>.*? (.*?) HTTP/1' for domain, path in re.findall(regex, response.get_body()): if 'unavailable' in domain: domain = response.get_url().get_domain() # Check if the requested domain and the found one are equal. if domain == response.get_url().get_domain(): found_url = response.get_url().get_protocol( ) + '://' + domain + path found_url = URL(found_url) # They are equal, request the URL and create the fuzzable # requests tmp_res = self._uri_opener.GET(found_url, cache=True) if not is_404(tmp_res): res.extend(self._create_fuzzable_requests(tmp_res)) else: # This is a shared hosting server self._shared_hosting_hosts.append(domain) # Now that we are outsite the for loop, we can report the possible vulns if len(self._shared_hosting_hosts): desc = 'The web application under test seems to be in a shared'\ ' hosting.' v = Vuln.from_fr('Shared hosting', desc, severity.MEDIUM, response.id, self.get_name(), fuzzable_request) self._shared_hosting_hosts = list(set(self._shared_hosting_hosts)) v['also_in_hosting'] = self._shared_hosting_hosts kb.kb.append(self, 'shared_hosting', v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) msg = 'This list of domains, and the domain of the web application'\ ' under test, all point to the same server:' om.out.vulnerability(msg, severity=severity.MEDIUM) for url in self._shared_hosting_hosts: om.out.vulnerability('- ' + url, severity=severity.MEDIUM) return res
def _do_request(self, fuzzable_request, original_resp, headers): ''' Send the request. :param fuzzable_request: The modified fuzzable request :param original_resp: The response for the original request that was sent. ''' response = self._uri_opener.GET(fuzzable_request.get_uri(), cache=True, headers=headers) add = False if not is_404(response): # We have different cases: # - If the URLs are different, then there is nothing to think # about, we simply found something new! if response.get_url() != original_resp.get_url(): add = True # - If the content type changed, then there is no doubt that # we've found something new! elif response.doc_type != original_resp.doc_type: add = True # - If we changed the query string parameters, we have to check # the content elif relative_distance_lt(response.get_clear_text_body(), original_resp.get_clear_text_body(), 0.8): # In this case what might happen is that the number we changed # is "out of range" and when requesting that it will trigger an # error in the web application, or show us a non-interesting # response that holds no content. # # We choose to return these to the core because they might help # with the code coverage efforts. Think about something like: # foo.aspx?id=OUT_OF_RANGE&foo=inject_here # vs. # foo.aspx?id=IN_RANGE&foo=inject_here # # This relates to the EXPECTED_URLS in test_digit_sum.py add = True if add: for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr)
def crawl(self, fuzzable_request): ''' Get the robots.txt file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' dirs = [] base_url = fuzzable_request.get_url().base_url() robots_url = base_url.url_join('robots.txt') http_response = self._uri_opener.GET(robots_url, cache=True) if not is_404(http_response): # Save it to the kb! desc = 'A robots.txt file was found at: "%s", this file might'\ ' expose private URLs and requires a manual review. The'\ ' scanner will add all URLs listed in this files to the'\ ' analysis queue.' desc = desc % robots_url i = Info('robots.txt file', desc, http_response.id, self.get_name()) i.set_url(robots_url) kb.kb.append(self, 'robots.txt', i) om.out.information(i.get_desc()) # Work with it... dirs.append(robots_url) for line in http_response.get_body().split('\n'): line = line.strip() if len(line) > 0 and line[0] != '#' and \ (line.upper().find('ALLOW') == 0 or line.upper().find('DISALLOW') == 0): url = line[line.find(':') + 1:] url = url.strip() try: url = base_url.url_join(url) except: # Simply ignore the invalid URL pass else: dirs.append(url) self.worker_pool.map(self.http_get_and_parse, dirs)
def _compare_dir(self, arg, directory, flist): ''' This function is the callback function called from os.path.walk, python's help says: walk(top, func, arg) Directory tree walk with callback function. For each directory in the directory tree rooted at top (including top itself, but excluding '.' and '..'), call func(arg, dirname, fnames). dirname is the name of the directory, and fnames a list of the names of the files and subdirectories in dirname (excluding '.' and '..'). func may modify the fnames list in-place (e.g. via del or slice assignment), and walk will only recurse into the subdirectories whose names remain in fnames; this can be used to implement a filter, or to impose a specific order of visiting. No semantics are defined for, or required of, arg, beyond that arg is always passed to func. It can be used, e.g., to pass a filename pattern, or a mutable object designed to accumulate statistics. Passing None for arg is common. ''' if self._first: self._first = False self._start_path = directory relative_dir = directory.replace(self._start_path, '') if relative_dir and not relative_dir.endswith('/'): relative_dir += '/' remote_root = self._remote_url_path remote_root_with_local_path = remote_root.url_join(relative_dir) for fname in flist: if os.path.isfile(directory + os.path.sep + fname): url = remote_root_with_local_path.url_join(fname) response = self._uri_opener.GET(url, cache=True) if not is_404(response): if response.is_text_or_html(): for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) self._check_content(response, directory + os.path.sep + fname) self._exist_remote.append(url) else: self._not_exist_remote.append(url)
def _compare_dir(self, arg, directory, flist): ''' This function is the callback function called from os.path.walk, python's help says: walk(top, func, arg) Directory tree walk with callback function. For each directory in the directory tree rooted at top (including top itself, but excluding '.' and '..'), call func(arg, dirname, fnames). dirname is the name of the directory, and fnames a list of the names of the files and subdirectories in dirname (excluding '.' and '..'). func may modify the fnames list in-place (e.g. via del or slice assignment), and walk will only recurse into the subdirectories whose names remain in fnames; this can be used to implement a filter, or to impose a specific order of visiting. No semantics are defined for, or required of, arg, beyond that arg is always passed to func. It can be used, e.g., to pass a filename pattern, or a mutable object designed to accumulate statistics. Passing None for arg is common. ''' if self._first: self._first = False self._start_path = directory relative_dir = directory.replace(self._start_path, '') if relative_dir and not relative_dir.endswith('/'): relative_dir += '/' remote_root = self._remote_url_path remote_root_with_local_path = remote_root.url_join(relative_dir) for fname in flist: if os.path.isfile(directory + os.path.sep + fname): url = remote_root_with_local_path.url_join(fname) response = self._uri_opener.GET(url, cache=True) if not is_404(response): if response.is_text_or_html(): for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) self._check_content( response, directory + os.path.sep + fname) self._exist_remote.append(url) else: self._not_exist_remote.append(url)
def discover(self, fuzzable_request): ''' Identify server software using favicon. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' domain_path = fuzzable_request.get_url().get_domain_path() # TODO: Maybe I should also parse the html to extract the favicon location? favicon_url = domain_path.url_join('favicon.ico') response = self._uri_opener.GET(favicon_url, cache=True) remote_fav_md5 = hashlib.md5(response.get_body()).hexdigest() if not is_404(response): # check if MD5 is matched in database/list for md5part, favicon_desc in self._read_favicon_db(): if md5part == remote_fav_md5: desc = 'Favicon.ico file was identified as "%s".' % favicon_desc i = Info('Favicon identification', desc, response.id, self.get_name()) i.set_url(favicon_url) kb.kb.append(self, 'info', i) om.out.information(i.get_desc()) break else: # # Report to the kb that we failed to ID this favicon.ico # and that the md5 should be sent to the developers. # desc = 'Favicon identification failed. If the remote site is' \ ' using framework that is being exposed by its favicon,'\ ' please send an email to [email protected]'\ ' including this md5 hash "%s" and the' \ ' name of the server or Web application it represents.' \ ' New fingerprints make this plugin more powerful and ' \ ' accurate.' desc = desc % remote_fav_md5 i = Info('Favicon identification failed', desc, response.id, self.get_name()) i.set_url(favicon_url) kb.kb.append(self, 'info', i) om.out.information(i.get_desc())
def http_get_and_parse(self, url): ''' Perform an HTTP GET to url, extract URLs from the HTTP response and put them into the plugin's output queue. :return: The http response that was parsed ''' try: http_response = self._uri_opener.GET(url, cache=True) except w3afException: pass else: if not is_404(http_response): for fr in self._create_fuzzable_requests(http_response): self.output_queue.put(fr) return http_response
def crawl(self, fuzzable_request): ''' Find users in a WordPress installation :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' if not self._exec: raise w3afRunOnce() else: # Check if there is a wordpress installation in this directory domain_path = fuzzable_request.get_url().get_domain_path() wp_unique_url = domain_path.url_join('wp-login.php') response = self._uri_opener.GET(wp_unique_url, cache=True) # If wp_unique_url is not 404, wordpress = true if not is_404(response): self._enum_users(fuzzable_request)
def grep(self, request, response): ''' Plugin entry point, search for the code disclosures. Unit tests are available at plugins/grep/tests. :param request: The HTTP request object. :param response: The HTTP response object :return: None ''' if response.is_text_or_html() and \ response.get_url() not in self._already_added: match, lang = is_source_file(response.get_body()) if match: # Check also for 404 if not is_404(response): desc = 'The URL: "%s" has a %s code disclosure vulnerability.' desc = desc % (response.get_url(), lang) v = Vuln('Code disclosure vulnerability', desc, severity.LOW, response.id, self.get_name()) v.set_url(response.get_url()) v.add_to_highlight(match.group()) self.kb_append_uniq(self, 'code_disclosure', v, 'URL') self._already_added.add(response.get_url()) else: self._first_404 = False desc = 'The URL: "%s" has a %s code disclosure'\ ' vulnerability in the customized 404 script.' desc = desc % (v.get_url(), lang) v = Vuln('Code disclosure vulnerability in 404 page', desc, severity.LOW, response.id, self.get_name()) v.set_url(response.get_url()) v.add_to_highlight(match.group()) self.kb_append_uniq(self, 'code_disclosure', v, 'URL')
def _check_existance(self, original_response, mutant): ''' Actually check if the mutated URL exists. :return: None, all important data is put() to self.output_queue ''' response = self._uri_opener.send_mutant(mutant) if not is_404(response) and \ relative_distance_lt(original_response.body, response.body, 0.85): # Verify against something random rand = rand_alpha() rand_mutant = mutant.copy() rand_mutant.set_mod_value(rand) rand_response = self._uri_opener.send_mutant(rand_mutant) if relative_distance_lt(response.body, rand_response.body, 0.85): for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr)
def discover(self, fuzzable_request): ''' Get the server-status and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' base_url = fuzzable_request.get_url().base_url() server_status_url = base_url.url_join('server-status') response = self._uri_opener.GET(server_status_url, cache=True) if not is_404(response) and response.get_code() not in range(400, 404): if 'apache' in response.get_body().lower(): msg = 'Apache server-status module is enabled and accessible.' msg += ' The URL is: "%s"' % response.get_url() om.out.information(msg) self._extract_server_version(fuzzable_request, response) return self._extract_urls(fuzzable_request, response)
def crawl(self, fuzzable_request): ''' :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' if not self._exec: raise w3afRunOnce() else: # Check if there is a wordpress installation in this directory domain_path = fuzzable_request.get_url().get_domain_path() wp_unique_url = domain_path.url_join('wp-login.php') response = self._uri_opener.GET(wp_unique_url, cache=True) # If wp_unique_url is not 404, wordpress = true if not is_404(response): # Only run once self._exec = False extracted_paths = self._extract_paths(domain_path) self._force_disclosures(domain_path, self.CHECK_PATHS + extracted_paths)
def grep(self, request, response): ''' Get the page indicated by the fuzzable_request and determine the language using the preposition list. :param request: The HTTP request object. :param response: The HTTP response object ''' with self._plugin_lock: if self._exec and response.is_text_or_html() and not is_404(response): body = response.get_clear_text_body().lower() guessed_lang = guess_language.guessLanguage(body) if guessed_lang == 'UNKNOWN': # None means "I'm still trying" kb.kb.raw_write(self, 'lang', None) # Keep running until self._tries_left is zero self._tries_left -= 1 if self._tries_left == 0: msg = 'Could not determine the site language using the'\ ' first 25 HTTP responses, not enough text to make'\ ' a good analysis.' om.out.debug(msg) # unknown means I'll stop testing because I don't # have any idea about the target's language kb.kb.raw_write(self, 'lang', 'unknown') self._exec = False else: # Only run until we find the page language self._exec = False msg = 'The page is written in: "%s".' om.out.information(msg % guessed_lang) kb.kb.raw_write(self, 'lang', guessed_lang)
def _enum_users(self, fuzzable_request): # Only run once self._exec = False # First user ID, will be incremented until 404 uid = 0 # Save the last title for non-redirection scenario self._title_cache = '' # Tolerance for user ID gaps in the sequence (this gaps are present # when users are deleted and new users created) gap_tolerance = 10 gap = 0 domain_path = fuzzable_request.get_url().get_domain_path() # Loop into authors and increment user ID while (gap <= gap_tolerance): uid += 1 gap += 1 domain_path.querystring = {u'author': u'%s' % uid} wp_author_url = domain_path response_author = self._uri_opener.GET(wp_author_url, cache=True) if is_404(response_author): continue if response_author.was_redirected(): extracted_from_redir = self._extract_from_redir( response_author) if extracted_from_redir: gap = 0 continue extracted_from_body = self._extract_from_body(response_author) if extracted_from_body: gap = 0 continue
def crawl(self, fuzzable_request): ''' GET some files and parse them. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' base_url = fuzzable_request.get_url().base_url() for url, re_obj in self.ORACLE_DATA: oracle_discovery_URL = base_url.url_join(url) response = self._uri_opener.GET(oracle_discovery_URL, cache=True) if not is_404(response): # Extract the links and send to core for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) # pylint: disable=E1101 # E1101: Instance of 'str' has no 'search' member mo = re_obj.search(response.get_body(), re.DOTALL) if mo: desc = '"%s" version "%s" was detected at "%s".' desc = desc % (mo.group(1).title(), mo.group(2).title(), response.get_url()) i = Info('Oracle Application Server', desc, response.id, self.get_name()) i.set_url(response.get_url()) kb.kb.append(self, 'oracle_discovery', i) om.out.information(i.get_desc()) else: msg = 'oracle_discovery found the URL: "%s" but failed to'\ ' parse it as an Oracle page. The first 50 bytes of'\ ' the response body is: "%s".' body_start = response.get_body()[:50] om.out.debug(msg % (response.get_url(), body_start))
def grep(self, request, response): ''' Plugin entry point, search for motw. :param request: The HTTP request object. :param response: The HTTP response object :return: None ''' if not response.is_text_or_html(): return if is_404(response): return motw_match = self._motw_re.search(response.get_body()) # Act based on finding/non-finding if motw_match: # This int() can't fail because the regex validated # the data before url_length_indicated = int(motw_match.group(1)) url_length_actual = len(motw_match.group(2)) if (url_length_indicated <= url_length_actual): desc = 'The URL: "%s" contains a valid mark of the web.' desc = desc % response.get_url() i = self.create_info(desc, response, motw_match) else: desc = 'The URL: "%s" will be executed in Local Machine'\ ' Zone security context because the indicated length'\ ' is greater than the actual URL length.' desc = desc % response.get_url() i = self.create_info(desc, response, motw_match) i['local_machine'] = True kb.kb.append(self, 'motw', i)
def crawl(self, fuzzable_request): ''' Get the sitemap.xml file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' base_url = fuzzable_request.get_url().base_url() sitemap_url = base_url.url_join('sitemap.xml') response = self._uri_opener.GET(sitemap_url, cache=True) # Remember that HTTPResponse objects have a faster "__in__" than # the one in strings; so string in response.get_body() is slower than # string in response if '</urlset>' in response and not is_404(response): om.out.debug('Analyzing sitemap.xml file.') for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) om.out.debug('Parsing xml file with xml.dom.minidom.') try: dom = xml.dom.minidom.parseString(response.get_body()) except: raise w3afException('Error while parsing sitemap.xml') else: raw_url_list = dom.getElementsByTagName("loc") parsed_url_list = [] for url in raw_url_list: try: url = url.childNodes[0].data url = URL(url) except ValueError, ve: om.out.debug( 'Sitemap file had an invalid URL: "%s"' % ve) except: om.out.debug('Sitemap file had an invalid format')
def _exists_in_target(self, url): ''' Check if a resource still exists in the target web site. :param url: The resource to verify. :return: None, the result is stored in self.output_queue ''' if url in self._already_verified: return self._already_verified.add(url) response = self._uri_opener.GET(url, cache=True) if not is_404(response): msg = 'The URL: "' + url + '" was found at archive.org and is' msg += ' STILL AVAILABLE in the target site.' om.out.debug(msg) for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) else: msg = 'The URL: "' + url + '" was found at archive.org and was' msg += ' DELETED from the target site.' om.out.debug(msg)