def __init__(self, crawl_infrastructure_plugins, w3af_core, max_discovery_time): """ :param crawl_infrastructure_plugins: Instances of CrawlInfrastructure plugins in a list :param w3af_core: The w3af core that we'll use for status reporting :param max_discovery_time: The max time (in seconds) to use for the discovery phase """ super(CrawlInfrastructure, self).__init__(crawl_infrastructure_plugins, w3af_core, thread_name=self.get_name(), max_pool_queued_tasks=100) self._max_discovery_time = int(max_discovery_time) # For filtering fuzzable requests found by plugins: self._variant_db = VariantDB() self._disabled_plugins = set() self._running = True self._report_max_time = True self._reported_found_urls = ScalableBloomFilter() # Override BaseConsumer.in_queue in order to have an ordered queue for # our crawling process. # # Read OrderedCachedQueue's documentation to understand why order is # important self.in_queue = OrderedCachedQueue(maxsize=10, name=self.get_name() + 'In')
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet() self._first_run = True self._known_variants = VariantDB() self._already_filled_form = ScalableBloomFilter() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._compile_re()
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet(table_prefix='web_spider') self._first_run = True self._target_urls = [] self._target_domain = None self._already_filled_form = ScalableBloomFilter() self._variant_db = VariantDB() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._ignore_extensions = [] self._compile_re()
def __init__(self, crawl_infrastructure_plugins, w3af_core, max_discovery_time): """ :param in_queue: The input queue that will feed the crawl_infrastructure plugins :param crawl_infrastructure_plugins: Instances of crawl_infrastructure plugins in a list :param w3af_core: The w3af core that we'll use for status reporting :param max_discovery_time: The max time (in seconds) to use for the discovery phase """ super(crawl_infrastructure, self).__init__(crawl_infrastructure_plugins, w3af_core, thread_name='CrawlInfra') self._max_discovery_time = int(max_discovery_time) # For filtering fuzzable requests found by plugins: self._variant_db = VariantDB() self._already_seen_urls = ScalableBloomFilter() self._disabled_plugins = set() self._running = True self._report_max_time = True
def setUp(self): create_temp_dir() self.vdb = VariantDB()
class TestVariantDB(unittest.TestCase): def setUp(self): create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'abc' * (PARAMS_MAX_VARIANTS + 1))) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) # Add (int, int) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs')) self.assertFalse(self.vdb.append(fr(url))) def test_clean_fuzzable_request_simple(self): u = 'http://w3af.org/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/' self.assertEqual(s, e) def test_clean_fuzzable_request_file(self): u = 'http://w3af.org/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_int(self): u = 'http://w3af.org/foo/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int(self): u = 'http://w3af.org/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str(self): u = 'http://w3af.org/index.php?id=2&foo=bar' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str_empty(self): u = 'http://w3af.org/index.php?id=2&foo=bar&spam=' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string' self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_no_params(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory(self): u = 'http://w3af.org/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_parent_path(self): u = 'http://w3af.org/spam/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_form_fuzzable_request(self): fr = FuzzableRequest(URL("http://www.w3af.com/"), headers=Headers([('Host', 'www.w3af.com')]), method='POST', post_data=KeyValueContainer(init_val=[('data', ['23'])])) expected = u'(POST)-http://www.w3af.com/!data=number' self.assertEqual(clean_fuzzable_request(fr), expected) def test_clean_form_fuzzable_request_form(self): form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL('http://example.com/?id=1')) form_params.set_method('post') form = dc_from_form_params(form_params) fr = FuzzableRequest.from_form(form) expected = u'(POST)-http://example.com/' \ u'?id=number!username=string&address=string' self.assertEqual(clean_fuzzable_request(fr), expected) def test_db_many_files_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_in_root_without_extension(self): url_fmt = 'http://w3af.org/foo%s' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_extensions_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different extension # url_fmt = 'http://w3af.org/foo%s.jpeg' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_root(self): url_fmt = 'http://w3af.org/foo%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_other_directories(self): url_fmt = 'http://w3af.org/foo/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent directory # url_fmt = 'http://w3af.org/spam/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_other_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # url_fmt = 'http://w3af.org/eggs/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_path_length_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # # Note the /bar/ here! This is what makes this test different url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_same_without_qs(self): url = URL('http://w3af.org/spam/foo.htm') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url))) def test_db_same_with_qs(self): url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url)))
class TestVariantDB(unittest.TestCase): def setUp(self): create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.need_more_variants(extra_url)) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, )))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' * (DEFAULT_MAX_VARIANTS + 1))))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) # Please note that in this case I'm asking for (int, int) and I added # (int, str) before self.assertTrue( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) # Add (int, int) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_clean_reference_simple(self): self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')), u'http://w3af.org/') def test_clean_reference_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php')), u'http://w3af.org/index.php') def test_clean_reference_directory_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')), u'http://w3af.org/foo/index.php') def test_clean_reference_directory_file_int(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/foo/index.php?id=2')), u'http://w3af.org/foo/index.php?id=number') def test_clean_reference_int(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')), u'http://w3af.org/index.php?id=number') def test_clean_reference_int_str(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar')), u'http://w3af.org/index.php?id=number&foo=string') def test_clean_reference_int_str_empty(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar&spam=')), u'http://w3af.org/index.php?id=number&foo=string&spam=string')
def setUp(self): MiscSettings().set_default_values() create_temp_dir() self.vdb = VariantDB()
class TestVariantDB(unittest.TestCase): def setUp(self): create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.need_more_variants(extra_url)) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, )))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' * (DEFAULT_MAX_VARIANTS + 1))))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) # Please note that in this case I'm asking for (int, int) and I added # (int, str) before self.assertTrue( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) # Add (int, int) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_clean_reference_simple(self): self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')), u'(GET)-http://w3af.org/') def test_clean_reference_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php')), u'(GET)-http://w3af.org/index.php') def test_clean_reference_directory_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')), u'(GET)-http://w3af.org/foo/index.php') def test_clean_reference_directory_file_int(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/foo/index.php?id=2')), u'(GET)-http://w3af.org/foo/index.php?id=number') def test_clean_reference_int(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')), u'(GET)-http://w3af.org/index.php?id=number') def test_clean_reference_int_str(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar')), u'(GET)-http://w3af.org/index.php?id=number&foo=string') def test_clean_reference_int_str_empty(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar&spam=')), u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string' ) def test_clean_form_fuzzable_request(self): fr = FuzzableRequest(URL("http://www.w3af.com/"), headers=Headers([('Host', 'www.w3af.com')]), method='POST', post_data=KeyValueContainer(init_val=[('data', ['23'])])) expected = u'(POST)-http://www.w3af.com/!data=number' self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected) def test_clean_form_fuzzable_request_form(self): form_params = FormParameters() form_params.add_input([("name", "username"), ("value", "abc")]) form_params.add_input([("name", "address"), ("value", "")]) form_params.set_action(URL('http://example.com/?id=1')) form_params.set_method('post') form = dc_from_form_params(form_params) fr = FuzzableRequest.from_form(form) expected = u'(POST)-http://example.com/?id=number!username=string&address=string' self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected)
class web_spider(CrawlPlugin): """ Crawl the web application. :author: Andres Riancho ([email protected]) """ NOT_404 = set([http_constants.UNAUTHORIZED, http_constants.FORBIDDEN]) def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet() self._first_run = True self._known_variants = VariantDB() self._already_filled_form = ScalableBloomFilter() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._compile_re() def crawl(self, fuzzable_req): """ Searches for links on the html. :param fuzzable_req: A fuzzable_req instance that contains (among other things) the URL to test. """ self._handle_first_run() # # If it is a form, then smart_fill the parameters to send something that # makes sense and will allow us to cover more code. # if isinstance(fuzzable_req, HTTPPostDataRequest): if fuzzable_req.get_url() in self._already_filled_form: return fuzzable_req = self._fill_form(fuzzable_req) # Send the HTTP request, resp = self._uri_opener.send_mutant(fuzzable_req) # Nothing to do here... if resp.get_code() == 401: return fuzz_req_list = self._create_fuzzable_requests( resp, request=fuzzable_req, add_self=False ) for fr in fuzz_req_list: self.output_queue.put(fr) self._extract_links_and_verify(resp, fuzzable_req) def _handle_first_run(self): if self._first_run: # I have to set some variables, in order to be able to code # the "only_forward" feature self._first_run = False self._target_urls = [i.get_domain_path() for i in cf.cf.get('targets')] # The following line triggered lots of bugs when the "stop" button # was pressed and the core did this: "cf.cf.save('targets', [])" #self._target_domain = cf.cf.get('targets')[0].get_domain() # Changing it to something awful but bug-free. targets = cf.cf.get('targets') if not targets: return else: self._target_domain = targets[0].get_domain() def _urls_to_verify_generator(self, resp, fuzzable_req): """ :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # # Note: I WANT to follow links that are in the 404 page. # # Modified when I added the PDFParser # I had to add this x OR y stuff, just because I don't want # the SGML parser to analyze a image file, its useless and # consumes CPU power. if resp.is_text_or_html() or resp.is_pdf() or resp.is_swf(): original_url = resp.get_redir_uri() try: doc_parser = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException, w3: om.out.debug('Failed to find a suitable document parser. ' 'Exception "%s"' % w3) else: # Note: # - With parsed_refs I'm 100% that it's really # something in the HTML that the developer intended to add. # # - The re_refs are the result of regular expressions, # which in some cases are just false positives. parsed_refs, re_refs = doc_parser.get_references() # I also want to analyze all directories, if the URL I just # fetched is: # http://localhost/a/b/c/f00.php I want to GET: # http://localhost/a/b/c/ # http://localhost/a/b/ # http://localhost/a/ # http://localhost/ # And analyze the responses... dirs = resp.get_url().get_directories() only_re_refs = set(re_refs) - set(dirs + parsed_refs) all_refs = itertools.chain(dirs, parsed_refs, re_refs) for ref in unique_justseen(sorted(all_refs)): # Ignore myself if ref == resp.get_uri(): continue # I don't want w3af sending requests to 3rd parties! if ref.get_domain() != self._target_domain: continue # Filter the URL's according to the configured regexs urlstr = ref.url_string if not self._compiled_follow_re.match(urlstr) or \ self._compiled_ignore_re.match(urlstr): continue if self._only_forward: if not self._is_forward(ref): continue # Work with the parsed references and report broken # links. Then work with the regex references and DO NOT # report broken links if self._need_more_variants(ref): self._known_variants.append(ref) possibly_broken = ref in only_re_refs yield ref, fuzzable_req, original_url, possibly_broken
class TestVariantDB(unittest.TestCase): def setUp(self): create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.need_more_variants(extra_url)) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,)))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse(self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' * (DEFAULT_MAX_VARIANTS + 1))))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) # Please note that in this case I'm asking for (int, int) and I added # (int, str) before self.assertTrue( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) # Add (int, int) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_clean_reference_simple(self): self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')), u'(GET)-http://w3af.org/') def test_clean_reference_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php')), u'(GET)-http://w3af.org/index.php') def test_clean_reference_directory_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')), u'(GET)-http://w3af.org/foo/index.php') def test_clean_reference_directory_file_int(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/foo/index.php?id=2')), u'(GET)-http://w3af.org/foo/index.php?id=number') def test_clean_reference_int(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')), u'(GET)-http://w3af.org/index.php?id=number') def test_clean_reference_int_str(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar')), u'(GET)-http://w3af.org/index.php?id=number&foo=string') def test_clean_reference_int_str_empty(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar&spam=')), u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string') def test_clean_form_fuzzable_request(self): fr = FuzzableRequest(URL("http://www.w3af.com/"), headers=Headers([('Host', 'www.w3af.com')]), method='POST', post_data=KeyValueContainer(init_val=[('data', ['23'])])) expected = u'(POST)-http://www.w3af.com/!data=number' self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected) def test_clean_form_fuzzable_request_form(self): form_params = FormParameters() form_params.add_input([("name", "username"), ("value", "abc")]) form_params.add_input([("name", "address"), ("value", "")]) form_params.set_action(URL('http://example.com/?id=1')) form_params.set_method('post') form = dc_from_form_params(form_params) fr = FuzzableRequest.from_form(form) expected = u'(POST)-http://example.com/?id=number!username=string&address=string' self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected)
class TestVariantDB(unittest.TestCase): def setUp(self): MiscSettings().set_default_values() create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'abc' * (PARAMS_MAX_VARIANTS + 1))) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) # Add (int, int) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs')) self.assertFalse(self.vdb.append(fr(url))) def test_clean_fuzzable_request_simple(self): u = 'http://w3af.org/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/' self.assertEqual(s, e) def test_clean_fuzzable_request_file(self): u = 'http://w3af.org/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_int(self): u = 'http://w3af.org/foo/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int(self): u = 'http://w3af.org/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str(self): u = 'http://w3af.org/index.php?id=2&foo=bar' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str_empty(self): u = 'http://w3af.org/index.php?id=2&foo=bar&spam=' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string' self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_no_params(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory(self): u = 'http://w3af.org/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_parent_path(self): u = 'http://w3af.org/spam/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_form_fuzzable_request(self): fr = FuzzableRequest(URL("http://www.w3af.com/"), headers=Headers([('Host', 'www.w3af.com')]), method='POST', post_data=KeyValueContainer(init_val=[('data', ['23'])])) expected = u'(POST)-http://www.w3af.com/!data=number' self.assertEqual(clean_fuzzable_request(fr), expected) def test_clean_form_fuzzable_request_form(self): form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL('http://example.com/?id=1')) form_params.set_method('post') form = dc_from_form_params(form_params) fr = FuzzableRequest.from_form(form) expected = u'(POST)-http://example.com/' \ u'?id=number!username=string&address=string' self.assertEqual(clean_fuzzable_request(fr), expected) def test_db_many_files_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_in_root_without_extension(self): url_fmt = 'http://w3af.org/foo%s' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_extensions_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different extension # url_fmt = 'http://w3af.org/foo%s.jpeg' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_root(self): url_fmt = 'http://w3af.org/foo%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_other_directories(self): url_fmt = 'http://w3af.org/foo/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent directory # url_fmt = 'http://w3af.org/spam/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_other_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # url_fmt = 'http://w3af.org/eggs/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_path_length_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # # Note the /bar/ here! This is what makes this test different url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, )) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_same_without_qs(self): url = URL('http://w3af.org/spam/foo.htm') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url))) def test_db_same_with_qs(self): url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url))) def test_encoding_issues_se(self): u = u'http://w3af.org/vård.png' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/file-5692fef3f5dcd97.png' self.assertEqual(s, e) def test_encoding_issues_se_with_qs(self): u = u'http://w3af.org/vård.png?id=1' s = clean_fuzzable_request(fr(URL(u))) e = '(GET)-http://w3af.org/vård.png?id=number' self.assertEqual(s, e) def test_encoding_issues_se_filename(self): u = u'http://w3af.org/x.vård' s = clean_fuzzable_request(fr(URL(u))) e = '(GET)-http://w3af.org/file-5692fef3f5dcd97.vård' self.assertEqual(s, e) def test_encoding_issues_se_path(self): u = u'http://w3af.org/vård/xyz.html' s = clean_fuzzable_request(fr(URL(u))) e = '(GET)-http://w3af.org/vård/file-5692fef3f5dcd97.html' self.assertEqual(s, e) def test_same_form_different_url(self): def create_fuzzable_request(_id): url_fmt = 'http://example.com/product/%s' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url_fmt % _id)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) # These two make sure we're returning false in the last call to # append because of MAX_EQUAL_FORM_VARIANTS and not any other limits self.assertGreater(PARAMS_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS) self.assertGreater(PATH_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS) for i in xrange(MAX_EQUAL_FORM_VARIANTS): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) fri = create_fuzzable_request(i + 1) self.assertFalse(self.vdb.append(fri)) def test_same_form_completely_different_url(self): def create_fuzzable_request(_id): path_count = _id * 5 paths = [rand_alnum(9) for _ in xrange(path_count)] url = 'http://example.com/%s' % '/'.join(paths) form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) fri = create_fuzzable_request(i + 1) self.assertFalse(self.vdb.append(fri)) def test_different_form_different_url(self): def create_fuzzable_request(_id): url_fmt = 'http://example.com/product/%s' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username%s" % _id), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url_fmt % _id)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) def test_different_form_same_url(self): def create_fuzzable_request(_id): url = 'http://example.com/product/1' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username%s" % _id), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) def test_forms_with_one_parameter_always_more_variants(self): def create_fuzzable_request(_id): url_fmt = 'http://example.com/product/%s' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.set_action(URL(url_fmt % _id)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri))
class web_spider(CrawlPlugin): """ Crawl the web application. :author: Andres Riancho ([email protected]) """ UNAUTH_FORBID = {http_constants.UNAUTHORIZED, http_constants.FORBIDDEN} def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet(table_prefix='web_spider') self._first_run = True self._target_urls = [] self._target_domain = None self._already_filled_form = ScalableBloomFilter() self._variant_db = VariantDB() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._ignore_extensions = [] self._compile_re() def crawl(self, fuzzable_request, debugging_id): """ Searches for links on the html. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_req instance that contains (among other things) the URL to test. """ self._handle_first_run() # # If it is a form, then smart_fill the parameters to send something that # makes sense and will allow us to cover more code. # data_container = fuzzable_request.get_raw_data() if isinstance(data_container, Form): if fuzzable_request.get_url() in self._already_filled_form: return self._already_filled_form.add(fuzzable_request.get_url()) data_container.smart_fill() # Send the HTTP request resp = self._uri_opener.send_mutant(fuzzable_request) # Nothing to do here... if resp.get_code() == http_constants.UNAUTHORIZED: return # Nothing to do here... if resp.is_image(): return # And we don't trust what comes from the core, check if 404 if is_404(resp): return self._extract_html_forms(resp, fuzzable_request) self._extract_links_and_verify(resp, fuzzable_request) def _extract_html_forms(self, resp, fuzzable_req): """ Parses the HTTP response body and extract HTML forms, resulting forms are put() on the output queue. """ # Try to find forms in the document try: dp = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException: # Failed to find a suitable parser for the document return # Create one FuzzableRequest for each form variant mode = cf.cf.get('form_fuzzing_mode') for form_params in dp.get_forms(): # Form exclusion #15161 form_id_json = form_params.get_form_id().to_json() om.out.debug('A new form was found! Form-id is: "%s"' % form_id_json) if not self._should_analyze_url(form_params.get_action()): continue headers = fuzzable_req.get_headers() for form_params_variant in form_params.get_variants(mode): data_container = dc_from_form_params(form_params_variant) # Now data_container is one of Multipart of URLEncoded form # instances, which is a DataContainer. Much better than the # FormParameters instance we had before in form_params_variant r = FuzzableRequest.from_form(data_container, headers=headers) self.output_queue.put(r) def _handle_first_run(self): if not self._first_run: return # I have to set some variables, in order to be able to code # the "only_forward" feature self._first_run = False self._target_urls = [i.uri2url() for i in cf.cf.get('targets')] # The following line triggered lots of bugs when the "stop" button # was pressed and the core did this: "cf.cf.save('targets', [])" # #self._target_domain = cf.cf.get('targets')[0].get_domain() # # Changing it to something awful but bug-free. targets = cf.cf.get('targets') if not targets: return self._target_domain = targets[0].get_domain() def _urls_to_verify_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ gen = itertools.chain(self._url_path_url_generator(resp, fuzzable_req), self._body_url_generator(resp, fuzzable_req), headers_url_generator(resp, fuzzable_req)) for ref, fuzzable_req, original_resp, possibly_broken in gen: if self._should_verify_extracted_url(ref, original_resp): yield ref, fuzzable_req, original_resp, possibly_broken def _url_path_url_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # Analyze all directories, if the URL w3af just found is: # # http://localhost/a/b/c/f00.php # # I want to GET: # # http://localhost/a/b/c/ # http://localhost/a/b/ # http://localhost/a/ # http://localhost/ # # And analyze the responses... dirs = resp.get_url().get_directories() for ref in unique_justseen(dirs): yield ref, fuzzable_req, resp, False def _body_url_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not The newly found URLs are extracted from the http response body using one of the framework's parsers. :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # # Note: I WANT to follow links that are in the 404 page. # try: doc_parser = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException as w3: om.out.debug('Failed to find a suitable document parser. ' 'Exception "%s"' % w3) else: # Note: # # - With parsed_refs I'm 100% that it's really # something in the HTML that the developer intended to add. # # - The re_refs are the result of regular expressions, # which in some cases are just false positives. parsed_refs, re_refs = doc_parser.get_references() dirs = resp.get_url().get_directories() only_re_refs = set(re_refs) - set(dirs + parsed_refs) all_refs = itertools.chain(parsed_refs, re_refs) resp_is_404 = is_404(resp) for ref in unique_justseen(sorted(all_refs)): possibly_broken = resp_is_404 or (ref in only_re_refs) yield ref, fuzzable_req, resp, possibly_broken def _should_analyze_url(self, ref): """ :param ref: A URL instance to match against the user configured filters :return: True if we should navigate to this URL """ # I don't want w3af sending requests to 3rd parties! if ref.get_domain() != self._target_domain: msg = 'web_spider will ignore %s (different domain name)' args = (ref.get_domain(),) om.out.debug(msg % args) return False # Filter the URL according to the configured regular expressions if not self._compiled_follow_re.match(ref.url_string): msg = 'web_spider will ignore %s (not match follow regex)' args = (ref.url_string,) om.out.debug(msg % args) return False if self._compiled_ignore_re.match(ref.url_string): msg = 'web_spider will ignore %s (match ignore regex)' args = (ref.url_string,) om.out.debug(msg % args) return False if self._has_ignored_extension(ref): msg = 'web_spider will ignore %s (match ignore extensions)' args = (ref.url_string,) om.out.debug(msg % args) return False # Implementing only forward if self._only_forward and not self._is_forward(ref): msg = 'web_spider will ignore %s (is not forward)' args = (ref.url_string,) om.out.debug(msg % args) return False return True def _has_ignored_extension(self, new_url): if not self._ignore_extensions: return False return new_url.get_extension().lower() in self._ignore_extensions def _should_verify_extracted_url(self, ref, resp): """ :param ref: A newly found URL :param resp: The HTTP response where the URL was found :return: Boolean indicating if I should send this new reference to the core. """ # Ignore myself if ref == resp.get_uri(): return False if not self._should_analyze_url(ref): return False # # I tried to have only one VariantDB in the framework instead of two, # but after some tests and architecture considerations it was better # to duplicate the data. # # In the future I'll run plugins in different processes than the core, # so it makes sense to have independent plugins. # # If I remove the web_spider VariantDB and just leave the one in the # core the framework keeps working but this method # (_should_verify_extracted_url) will return True much more often, which # leads to extra HTTP requests for URLs which we already checked and the # core will dismiss anyway # fuzzable_request = FuzzableRequest(ref) if self._variant_db.append(fuzzable_request): return True return False def _extract_links_and_verify(self, resp, fuzzable_req): """ This is a very basic method that will send the work to different threads. Work is generated by the _urls_to_verify_generator :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ self.worker_pool.map_multi_args( self._verify_reference, self._urls_to_verify_generator(resp, fuzzable_req)) def _verify_reference(self, reference, original_request, original_response, possibly_broken, be_recursive=True): """ The parameters are: * Newly found URL * The FuzzableRequest instance which generated the response where the new URL was found * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not This method GET's every new link and parses it in order to get new links and forms. """ # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier than simply ignoring the # referer # referer = original_response.get_url().base_url().url_string headers = Headers([('Referer', referer)]) # Note: We're not grep'ing this HTTP request/response now because it # has high probability of being a 404, and the grep plugins # already got enough 404 responses to analyze (from is_404 for # example). If it's not a 404 then we'll push it to the core # and it will come back to this plugin's crawl() where it will # be requested with grep=True resp = self._uri_opener.GET(reference, cache=True, headers=headers, grep=False) if not is_404(resp): msg = '[web_spider] Found new link "%s" at "%s"' args = (reference, original_response.get_url()) om.out.debug(msg % args) fuzz_req = FuzzableRequest(reference, headers=headers) # These next steps are simple, but actually allows me to set the # referer and cookie for the FuzzableRequest instances I'm sending # to the core, which will then allow the fuzzer to create # CookieMutant and HeadersMutant instances. # # Without setting the Cookie, the CookieMutant would never have any # data to modify; remember that cookies are actually set by the # urllib2 cookie handler when the request already exited the # framework. cookie = Cookie.from_http_response(original_response) fuzz_req.set_referer(referer) fuzz_req.set_cookie(cookie) self.output_queue.put(fuzz_req) return # Note: I WANT to follow links that are in the 404 page, but # DO NOT return the 404 itself to the core. # # This will parse the 404 response and add the 404-links in the # output queue, so that the core can get them # if be_recursive: # # Only follow one level of links in 404 pages, this limits the # potential issue when this is found: # # http://foo.com/abc/ => 404 # Body: <a href="def/">link</a> # # Which would lead to this function to perform requests to: # * http://foo.com/abc/ # * http://foo.com/abc/def/ # * http://foo.com/abc/def/def/ # * http://foo.com/abc/def/def/def/ # * ... # # Do not use threads here, it will dead-lock (for unknown # reasons). This is tested in TestDeadLock unittest. for args in self._urls_to_verify_generator(resp, original_request): self._verify_reference(*args, be_recursive=False) # Store the broken links if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) def end(self): """ Called when the process ends, prints out the list of broken links. """ if len(self._broken_links): om.out.information('The following is a list of broken links that' ' were found by the web_spider plugin:') for broken, where in unique_justseen(self._broken_links.ordered_iter()): om.out.information('- %s [ referenced from: %s ]' % (broken, where)) self._broken_links.cleanup() def _is_forward(self, reference): """ Check if the reference is inside the target directories. :return: True if reference is an URL inside the directory structure of at least one of the target URLs. """ for domain_path in self._target_urls: if reference.url_string.startswith(domain_path.url_string): return True return False def get_options(self): """ :return: A list of option objects for this plugin. """ ol = OptionList() d = 'Only crawl links to paths inside the URL given as target.' o = opt_factory('only_forward', self._only_forward, d, BOOL) ol.add(o) d = ('Only crawl links that match this regular expression.' ' Note that ignore_regex has precedence over follow_regex.') o = opt_factory('follow_regex', self._follow_regex, d, REGEX) ol.add(o) d = ('DO NOT crawl links that match this regular expression.' ' Note that ignore_regex has precedence over follow_regex.') o = opt_factory('ignore_regex', self._ignore_regex, d, REGEX) ol.add(o) d = 'DO NOT crawl links that use these extensions.' h = ('This configuration parameter is commonly used to ignore' ' static files such as zip, pdf, jpeg, etc. It is possible to' ' ignore these files using `ignore_regex`, but configuring' ' this parameter is easier and performs case insensitive' ' matching.') o = opt_factory('ignore_extensions', self._ignore_extensions, d, LIST, help=h) ol.add(o) return ol def set_options(self, options_list): """ This method sets all the options that are configured using the user interface generated by the framework using the result of get_options(). :param options_list: A dictionary with the options for the plugin. :return: No value is returned. """ self._only_forward = options_list['only_forward'].get_value() self._ignore_regex = options_list['ignore_regex'].get_value() self._follow_regex = options_list['follow_regex'].get_value() self._compile_re() self._ignore_extensions = options_list['ignore_extensions'].get_value() self._ignore_extensions = [ext.lower() for ext in self._ignore_extensions] def _compile_re(self): """ Compile the regular expressions that are going to be used to ignore or follow links. """ if self._ignore_regex: # Compilation of this regex can't fail because it was already # verified as valid at regex_option.py: see REGEX in get_options() self._compiled_ignore_re = re.compile(self._ignore_regex) else: # If the self._ignore_regex is empty then I don't have to ignore # anything. To be able to do that, I simply compile an re with "abc" # as the pattern, which won't match any URL since they will all # start with http:// or https:// self._compiled_ignore_re = re.compile('abc') # Compilation of this regex can't fail because it was already # verified as valid at regex_option.py: see REGEX in get_options() self._compiled_follow_re = re.compile(self._follow_regex) def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class TestVariantDB(unittest.TestCase): def setUp(self): create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.need_more_variants(extra_url)) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,)))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse(self.vdb.need_more_variants( URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' * (DEFAULT_MAX_VARIANTS + 1))))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) # Please note that in this case I'm asking for (int, int) and I added # (int, str) before self.assertTrue( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) # Add (int, int) for i in xrange(DEFAULT_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.need_more_variants(url)) self.vdb.append(url) self.assertFalse( self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1)))) def test_clean_reference_simple(self): self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')), u'http://w3af.org/') def test_clean_reference_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php')), u'http://w3af.org/index.php') def test_clean_reference_directory_file(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')), u'http://w3af.org/foo/index.php') def test_clean_reference_directory_file_int(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/foo/index.php?id=2')), u'http://w3af.org/foo/index.php?id=number') def test_clean_reference_int(self): self.assertEqual( self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')), u'http://w3af.org/index.php?id=number') def test_clean_reference_int_str(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar')), u'http://w3af.org/index.php?id=number&foo=string') def test_clean_reference_int_str_empty(self): self.assertEqual( self.vdb._clean_reference( URL('http://w3af.org/index.php?id=2&foo=bar&spam=')), u'http://w3af.org/index.php?id=number&foo=string&spam=string')
class TestVariantDB(unittest.TestCase): def setUp(self): MiscSettings().set_default_values() create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'abc' * (PARAMS_MAX_VARIANTS + 1))) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) # Add (int, int) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs')) self.assertFalse(self.vdb.append(fr(url))) def test_clean_fuzzable_request_simple(self): u = 'http://w3af.org/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/' self.assertEqual(s, e) def test_clean_fuzzable_request_file(self): u = 'http://w3af.org/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_int(self): u = 'http://w3af.org/foo/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int(self): u = 'http://w3af.org/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str(self): u = 'http://w3af.org/index.php?id=2&foo=bar' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str_empty(self): u = 'http://w3af.org/index.php?id=2&foo=bar&spam=' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string' self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_no_params(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory(self): u = 'http://w3af.org/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_parent_path(self): u = 'http://w3af.org/spam/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_form_fuzzable_request(self): fr = FuzzableRequest(URL("http://www.w3af.com/"), headers=Headers([('Host', 'www.w3af.com')]), method='POST', post_data=KeyValueContainer(init_val=[('data', ['23'])])) expected = u'(POST)-http://www.w3af.com/!data=number' self.assertEqual(clean_fuzzable_request(fr), expected) def test_clean_form_fuzzable_request_form(self): form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL('http://example.com/?id=1')) form_params.set_method('post') form = dc_from_form_params(form_params) fr = FuzzableRequest.from_form(form) expected = u'(POST)-http://example.com/' \ u'?id=number!username=string&address=string' self.assertEqual(clean_fuzzable_request(fr), expected) def test_db_many_files_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_in_root_without_extension(self): url_fmt = 'http://w3af.org/foo%s' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_extensions_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different extension # url_fmt = 'http://w3af.org/foo%s.jpeg' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_root(self): url_fmt = 'http://w3af.org/foo%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_other_directories(self): url_fmt = 'http://w3af.org/foo/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent directory # url_fmt = 'http://w3af.org/spam/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_other_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # url_fmt = 'http://w3af.org/eggs/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_path_length_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # # Note the /bar/ here! This is what makes this test different url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_same_without_qs(self): url = URL('http://w3af.org/spam/foo.htm') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url))) def test_db_same_with_qs(self): url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url))) def test_encoding_issues_se(self): u = u'http://w3af.org/vård.png' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/file-5692fef3f5dcd97.png' self.assertEqual(s, e) def test_encoding_issues_se_with_qs(self): u = u'http://w3af.org/vård.png?id=1' s = clean_fuzzable_request(fr(URL(u))) e = '(GET)-http://w3af.org/vård.png?id=number' self.assertEqual(s, e) def test_encoding_issues_se_filename(self): u = u'http://w3af.org/x.vård' s = clean_fuzzable_request(fr(URL(u))) e = '(GET)-http://w3af.org/file-5692fef3f5dcd97.vård' self.assertEqual(s, e) def test_encoding_issues_se_path(self): u = u'http://w3af.org/vård/xyz.html' s = clean_fuzzable_request(fr(URL(u))) e = '(GET)-http://w3af.org/vård/file-5692fef3f5dcd97.html' self.assertEqual(s, e) def test_same_form_different_url(self): def create_fuzzable_request(_id): url_fmt = 'http://example.com/product/%s' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url_fmt % _id)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) # These two make sure we're returning false in the last call to # append because of MAX_EQUAL_FORM_VARIANTS and not any other limits self.assertGreater(PARAMS_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS) self.assertGreater(PATH_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS) for i in xrange(MAX_EQUAL_FORM_VARIANTS): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) fri = create_fuzzable_request(i + 1) self.assertFalse(self.vdb.append(fri)) def test_same_form_completely_different_url(self): def create_fuzzable_request(_id): path_count = _id * 5 paths = [rand_alnum(9) for _ in xrange(path_count)] url = 'http://example.com/%s' % '/'.join(paths) form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) fri = create_fuzzable_request(i + 1) self.assertFalse(self.vdb.append(fri)) def test_different_form_different_url(self): def create_fuzzable_request(_id): url_fmt = 'http://example.com/product/%s' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username%s" % _id), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url_fmt % _id)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) def test_different_form_same_url(self): def create_fuzzable_request(_id): url = 'http://example.com/product/1' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username%s" % _id), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL(url)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri)) def test_forms_with_one_parameter_always_more_variants(self): def create_fuzzable_request(_id): url_fmt = 'http://example.com/product/%s' form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.set_action(URL(url_fmt % _id)) form_params.set_method('post') form = dc_from_form_params(form_params) return FuzzableRequest.from_form(form) for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2): fri = create_fuzzable_request(i) self.assertTrue(self.vdb.append(fri))
class TestVariantDB(unittest.TestCase): def setUp(self): create_temp_dir() self.vdb = VariantDB() def test_db_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_int_int_var(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'abc' * (PARAMS_MAX_VARIANTS + 1))) self.assertFalse(self.vdb.append(fr(url))) def test_db_int_str_then_int_int(self): url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s' # Add (int, str) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, 'abc' * i)) self.assertTrue(self.vdb.append(fr(url))) # Add (int, int) for i in xrange(PARAMS_MAX_VARIANTS): url = URL(url_fmt % (i, i)) self.assertTrue(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1)) self.assertFalse(self.vdb.append(fr(url))) url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs')) self.assertFalse(self.vdb.append(fr(url))) def test_clean_fuzzable_request_simple(self): u = 'http://w3af.org/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/' self.assertEqual(s, e) def test_clean_fuzzable_request_file(self): u = 'http://w3af.org/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_int(self): u = 'http://w3af.org/foo/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int(self): u = 'http://w3af.org/index.php?id=2' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str(self): u = 'http://w3af.org/index.php?id=2&foo=bar' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string' self.assertEqual(s, e) def test_clean_fuzzable_request_int_str_empty(self): u = 'http://w3af.org/index.php?id=2&foo=bar&spam=' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string' self.assertEqual(s, e) def test_clean_fuzzable_request_directory_file_no_params(self): u = 'http://w3af.org/foo/index.php' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory(self): u = 'http://w3af.org/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_fuzzable_request_directory_parent_path(self): u = 'http://w3af.org/spam/foo/' s = clean_fuzzable_request(fr(URL(u))) e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN self.assertEqual(s, e) def test_clean_form_fuzzable_request(self): fr = FuzzableRequest(URL("http://www.w3af.com/"), headers=Headers([('Host', 'www.w3af.com')]), method='POST', post_data=KeyValueContainer(init_val=[('data', ['23'])])) expected = u'(POST)-http://www.w3af.com/!data=number' self.assertEqual(clean_fuzzable_request(fr), expected) def test_clean_form_fuzzable_request_form(self): form_params = FormParameters() form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")]) form_params.add_field_by_attr_items([("name", "address"), ("value", "")]) form_params.set_action(URL('http://example.com/?id=1')) form_params.set_method('post') form = dc_from_form_params(form_params) fr = FuzzableRequest.from_form(form) expected = u'(POST)-http://example.com/' \ u'?id=number!username=string&address=string' self.assertEqual(clean_fuzzable_request(fr), expected) def test_db_many_files_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_in_root_without_extension(self): url_fmt = 'http://w3af.org/foo%s' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_extensions_in_root(self): url_fmt = 'http://w3af.org/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different extension # url_fmt = 'http://w3af.org/foo%s.jpeg' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_root(self): url_fmt = 'http://w3af.org/foo%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_paths_in_other_directories(self): url_fmt = 'http://w3af.org/foo/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent directory # url_fmt = 'http://w3af.org/spam/bar%s/' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_other_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # url_fmt = 'http://w3af.org/eggs/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_many_files_different_path_length_directories(self): url_fmt = 'http://w3af.org/spam/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) # # Now a different parent path and the same extension # # Note the /bar/ here! This is what makes this test different url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm' for i in xrange(PATH_MAX_VARIANTS): url = URL(url_fmt % i) self.assertTrue(self.vdb.append(fr(url))) extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,)) self.assertFalse(self.vdb.append(fr(extra_url))) def test_db_same_without_qs(self): url = URL('http://w3af.org/spam/foo.htm') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url))) def test_db_same_with_qs(self): url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333') self.assertTrue(self.vdb.append(fr(url))) self.assertFalse(self.vdb.append(fr(url)))