def test_add_HTTPPostDataRequest(self): ds = DiskSet() uri = URL('http://w3af.org/?id=2') hdr = Headers([('Referer', 'http://w3af.org/')]) pdr1 = HTTPPostDataRequest(uri, method='GET', headers=hdr) uri = URL('http://w3af.org/?id=3') pdr2 = HTTPPostDataRequest(uri, method='GET', headers=hdr) uri = URL('http://w3af.org/?id=7') pdr3 = HTTPPostDataRequest(uri, method='FOO', headers=hdr) ds.add(pdr1) ds.add(pdr2) ds.add(pdr2) ds.add(pdr1) self.assertEqual(ds[0], pdr1) self.assertEqual(ds[1], pdr2) self.assertFalse(pdr3 in ds) self.assertTrue(pdr2 in ds) self.assertEqual(len(ds), 2) # This forces an internal change in the URL object pdr2.get_url().url_string self.assertTrue(pdr2 in ds)
def test_add(self): ds = DiskSet() ds.add(1) ds.add(2) ds.add(3) ds.add(1) self.assertEqual(list(ds), [1, 2, 3]) self.assertEqual(len(ds), 3)
def test_add_urlobject(self): ds = DiskSet() ds.add(URL('http://w3af.org/?id=2')) ds.add(URL('http://w3af.org/?id=3')) ds.add(URL('http://w3af.org/?id=3')) self.assertEqual(ds[0], URL('http://w3af.org/?id=2')) self.assertEqual(ds[1], URL('http://w3af.org/?id=3')) self.assertEqual(len(ds), 2) self.assertFalse(URL('http://w3af.org/?id=4') in ds) self.assertTrue(URL('http://w3af.org/?id=2') in ds)
def test_remove_table(self): disk_set = DiskSet() disk_set.add(1) disk_set.add(2) table_name = disk_set.table_name db = get_default_temp_db_instance() self.assertTrue(db.table_exists(table_name)) disk_set.cleanup() self.assertFalse(db.table_exists(table_name))
def cleanup(self): ''' Cleanup internal data. ''' self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name) # Remove the old, create new. self.urls.cleanup() self.urls = DiskSet() self.fuzzable_requests.cleanup() self.fuzzable_requests = DiskSet() self.observers.clear()
class phpinfo(CrawlPlugin): ''' Search PHP Info file and if it finds it will determine the version of PHP. :author: Viktor Gazdag ( [email protected] ) ''' ''' CHANGELOG: Feb/17/2009- Added PHP Settings Audit Checks by Aung Khant (aungkhant[at]yehg.net) ''' def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = DiskSet() self._has_audited = 0 def crawl(self, fuzzable_request): ''' For every directory, fetch a list of files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' for domain_path in fuzzable_request.get_url().get_directories(): if domain_path in self._analyzed_dirs: continue self._analyzed_dirs.add(domain_path) url_repeater = repeat(domain_path) args = izip(url_repeater, self._get_potential_phpinfos()) self.worker_pool.map_multi_args(self._check_and_analyze, args) def _check_and_analyze(self, domain_path, php_info_filename): ''' Check if a php_info_filename exists in the domain_path. :return: None, everything is put() into the self.output_queue. ''' # Request the file php_info_url = domain_path.url_join(php_info_filename) try: response = self._uri_opener.GET(php_info_url, cache=True) except w3afException, w3: msg = 'Failed to GET phpinfo file: "%s". Exception: "%s".' om.out.debug(msg % (php_info_url, w3)) else:
def __init__(self): CrawlPlugin.__init__(self) # User configured parameters self._dir_list = os.path.join('plugins', 'crawl', 'dir_file_bruter', 'common_dirs_small.db') self._file_list = os.path.join('plugins', 'crawl', 'dir_file_bruter', 'common_files_small.db') self._bf_directories = True self._bf_files = False self._be_recursive = False # Internal variables self._exec = True self._already_tested = DiskSet()
def __init__(self): super(DBKnowledgeBase, self).__init__() self.urls = DiskSet() self.fuzzable_requests = DiskSet() self.db = get_default_persistent_db_instance() columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] self.table_name = rand_alpha(30) self.db.create_table(self.table_name, columns) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, ['uniq_id',]) self.db.commit() # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self.type_observers = {} #WeakValueDictionary() self._observer_id = 0
def __init__(self): super(DBKnowledgeBase, self).__init__() self.urls = DiskSet() self.fuzzable_requests = DiskSet() self.db = get_default_persistent_db_instance() columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] self.table_name = rand_alpha(30) self.db.create_table(self.table_name, columns) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, [ 'uniq_id', ]) self.db.commit() # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self.type_observers = {} #WeakValueDictionary() self._observer_id = 0
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet() self._first_run = True self._known_variants = VariantDB() self._already_filled_form = ScalableBloomFilter() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._compile_re()
def test_thread_safe(self): ds = DiskSet() def worker(range_inst): for i in range_inst: ds.add(i) threads = [] _min = 0 add_dups = False for _max in xrange(0, 1100, 100): th = threading.Thread(target=worker, args=(xrange(_min, _max), )) threads.append(th) # For testing the uniqueness of DiskSets add_dups = not add_dups if add_dups: th = threading.Thread(target=worker, args=(xrange(_min, _max), )) threads.append(th) _min = _max for th in threads: th.start() for th in threads: th.join() for i in xrange(0, 1000): self.assertTrue(i in ds, i) ds_as_list = list(ds) self.assertEqual(len(ds_as_list), len(set(ds_as_list))) ds_as_list.sort() self.assertEqual(ds_as_list, range(1000))
class error_500(GrepPlugin): ''' Grep every page for error 500 pages that haven't been identified as bugs by other plugins. :author: Andres Riancho ([email protected]) ''' IGNORE_CODES = (404, 403, 401, 405, 400, 501) FALSE_POSITIVE_STRINGS = ('<h1>Bad Request (Invalid URL)</h1>', ) def __init__(self): GrepPlugin.__init__(self) self._error_500_responses = DiskSet() def grep(self, request, response): ''' Plugin entry point, identify which requests generated a 500 error. :param request: The HTTP request object. :param response: The HTTP response object :return: None ''' if response.is_text_or_html() \ and response.get_code() > 400 \ and response.get_code() < 600 \ and response.get_code() not in self.IGNORE_CODES\ and not self._is_false_positive(response): self._error_500_responses.add((request, response.id)) def _is_false_positive(self, response): ''' Filters out some false positives like this one: This false positive is generated by IIS when I send an URL that's "odd" Some examples of URLs that trigger this false positive: - http://127.0.0.2/ext.ini.%00.txt - http://127.0.0.2/%00/ - http://127.0.0.2/%0a%0a<script>alert(\Vulnerable\)</script>.jsp :return: True if the response is a false positive. ''' for fps in self.FALSE_POSITIVE_STRINGS: if fps in response.get_body(): return True return False def end(self): ''' This method is called when the plugin wont be used anymore. The real job of this plugin is done here, where I will try to see if one of the error_500 responses were not identified as a vuln by some of my audit plugins ''' all_vulns = kb.kb.get_all_vulns() all_vulns_tuples = [(v.get_uri(), v.get_dc()) for v in all_vulns] for request, error_500_response_id in self._error_500_responses: if (request.get_uri(), request.get_dc()) not in all_vulns_tuples: # Found a err 500 that wasnt identified !!! desc = 'An unidentified web application error (HTTP response'\ ' code 500) was found at: "%s". Enable all plugins and'\ ' try again, if the vulnerability still is not identified'\ ', please verify manually and report it to the w3af'\ ' developers.' desc = desc % request.get_url() v = Vuln('Unhandled error in web application', desc, severity.MEDIUM, error_500_response_id, self.get_name()) v.set_uri(request.get_uri()) self.kb_append_uniq(self, 'error_500', v, 'VAR') self._error_500_responses.cleanup() def get_long_desc(self): ''' :return: A DETAILED description of the plugin functions and features. ''' return '''
class dir_file_bruter(CrawlPlugin): ''' Finds Web server directories and files by bruteforcing. :author: Jon Rose ( [email protected] ) :author: Andres Riancho ( [email protected] ) :author: Tomas Velazquez ''' def __init__(self): CrawlPlugin.__init__(self) # User configured parameters self._dir_list = os.path.join('plugins', 'crawl', 'dir_file_bruter', 'common_dirs_small.db') self._file_list = os.path.join('plugins', 'crawl', 'dir_file_bruter', 'common_files_small.db') self._bf_directories = True self._bf_files = False self._be_recursive = False # Internal variables self._exec = True self._already_tested = DiskSet() def crawl(self, fuzzable_request): ''' Get the file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' if not self._exec: raise w3afRunOnce() else: domain_path = fuzzable_request.get_url().get_domain_path() # Should I run more than once? if not self._be_recursive: self._exec = False if domain_path not in self._already_tested: self._already_tested.add(domain_path) self._bruteforce_directories(domain_path) def _dir_name_generator(self, base_path): ''' Simple generator that returns the names of the directories and files to test. It extracts the information from the user configured wordlist parameter. @yields: (A string with the directory or file name, a URL object with the dir or file name) ''' if self._bf_directories: for directory_name in file(self._dir_list): directory_name = directory_name.strip() # ignore comments and empty lines if directory_name and not directory_name.startswith('#'): try: dir_url = base_path.url_join(directory_name + '/') except ValueError, ve: msg = 'The "%s" line at "%s" generated an ' \ 'invalid URL: %s' om.out.debug(msg % (directory_name, self._dir_list, ve)) else: yield directory_name, dir_url if self._bf_files: for file_name in file(self._file_list): file_name = file_name.strip() # ignore comments and empty lines if file_name and not file_name.startswith('#'): try: dir_url = base_path.url_join(file_name) except ValueError, ve: msg = 'The "%s" line at "%s" generated an ' \ 'invalid URL: %s' om.out.debug(msg % (file_name, self._file_list, ve)) else: yield file_name, dir_url
def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = DiskSet() self._has_audited = 0
class DBKnowledgeBase(BasicKnowledgeBase): ''' This class saves the data that is sent to it by plugins. It is the only way in which plugins can exchange information. Data is stored in a DB. :author: Andres Riancho ([email protected]) ''' def __init__(self): super(DBKnowledgeBase, self).__init__() self.urls = DiskSet() self.fuzzable_requests = DiskSet() self.db = get_default_persistent_db_instance() columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] self.table_name = rand_alpha(30) self.db.create_table(self.table_name, columns) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, [ 'uniq_id', ]) self.db.commit() # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self.type_observers = {} #WeakValueDictionary() self._observer_id = 0 def clear(self, location_a, location_b): location_a = self._get_real_name(location_a) query = "DELETE FROM %s WHERE location_a = ? and location_b = ?" params = (location_a, location_b) self.db.execute(query % self.table_name, params) def raw_write(self, location_a, location_b, value): ''' This method saves value to (location_a,location_b) but previously clears any pre-existing values. ''' if isinstance(value, Info): raise TypeError( 'Use append or append_uniq to store vulnerabilities') location_a = self._get_real_name(location_a) self.clear(location_a, location_b) self.append(location_a, location_b, value, ignore_type=True) def raw_read(self, location_a, location_b): ''' This method reads the value from (location_a,location_b) ''' location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=False) if len(result) > 1: msg = 'Incorrect use of raw_write/raw_read, found %s rows.' raise RuntimeError(msg % result) elif len(result) == 0: return [] else: return result[0] def _get_uniq_id(self, obj): if isinstance(obj, Info): return obj.get_uniq_id() else: if isinstance(obj, collections.Iterable): concat_all = ''.join([str(i) for i in obj]) return str(hash(concat_all)) else: return str(hash(obj)) def append(self, location_a, location_b, value, ignore_type=False): ''' This method appends the location_b value to a dict. ''' if not ignore_type and not isinstance(value, (Info, Shell)): msg = 'You MUST use raw_write/raw_read to store non-info objects'\ ' to the KnowledgeBase.' raise TypeError(msg) location_a = self._get_real_name(location_a) uniq_id = self._get_uniq_id(value) pickled_obj = cPickle.dumps(value) t = (location_a, location_b, uniq_id, pickled_obj) query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name self.db.execute(query, t) self._notify(location_a, location_b, value) def get(self, location_a, location_b, check_types=True): ''' :param location_a: The plugin that saved the data to the kb.info Typically the name of the plugin, but could also be the plugin instance. :param location_b: The name of the variables under which the vuln objects were saved. Typically the same name of the plugin, or something like "vulns", "errors", etc. In most cases this is NOT None. When set to None, a dict with all the vuln objects found by the plugin_name is returned. :return: Returns the data that was saved by another plugin. ''' location_a = self._get_real_name(location_a) if location_b is None: query = 'SELECT pickle FROM %s WHERE location_a = ?' params = (location_a, ) else: query = 'SELECT pickle FROM %s WHERE location_a = ?'\ ' and location_b = ?' params = (location_a, location_b) result_lst = [] results = self.db.select(query % self.table_name, params) for r in results: obj = cPickle.loads(r[0]) if check_types and not isinstance(obj, (Info, Shell)): raise TypeError('Use raw_write and raw_read to query the' ' knowledge base for non-Info objects') result_lst.append(obj) return result_lst def get_by_uniq_id(self, uniq_id): query = 'SELECT pickle FROM %s WHERE uniq_id = ?' params = (uniq_id, ) result = self.db.select_one(query % self.table_name, params) if result is not None: result = cPickle.loads(result[0]) return result def add_observer(self, location_a, location_b, observer): ''' Add the observer function to the observer list. The function will be called when there is a change in (location_a, location_b). You can use None in location_a or location_b as wildcards. The observer function needs to be a function which takes three params: * location_a * location_b * value that's added to the kb location :return: None ''' if not isinstance(location_a, (basestring, types.NoneType)) or \ not isinstance(location_a, (basestring, types.NoneType)): raise TypeError('Observer locations need to be strings or None.') observer_id = self.get_observer_id() self.observers[(location_a, location_b, observer_id)] = observer def add_types_observer(self, type_filter, observer): ''' Add the observer function to the list of functions to be called when a new object that is of type "type_filter" is added to the KB. The type_filter must be one of Info, Vuln or Shell. :return: None ''' if type_filter not in (Info, Vuln, Shell): msg = 'The type_filter needs to be one of Info, Vuln or Shell' raise TypeError(msg) observer_id = self.get_observer_id() self.type_observers[(type_filter, observer_id)] = observer def get_observer_id(self): self._observer_id += 1 return self._observer_id def _notify(self, location_a, location_b, value): ''' Call the observer if the location_a/location_b matches with the configured observers. :return: None ''' # Note that I copy the items list in order to iterate though it without # any issues like the size changing for (obs_loc_a, obs_loc_b, _), observer in self.observers.items()[:]: if obs_loc_a is None and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b == location_b: observer(location_a, location_b, value) continue for (type_filter, _), observer in self.type_observers.items()[:]: if isinstance(value, type_filter): observer(location_a, location_b, value) def get_all_entries_of_class(self, klass): ''' :return: A list of all objects of class == klass that are saved in the kb. ''' query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if isinstance(obj, klass): result_lst.append(obj) return result_lst def dump(self): result_dict = {} query = 'SELECT location_a, location_b, pickle FROM %s' results = self.db.select(query % self.table_name) for location_a, location_b, pickle in results: obj = cPickle.loads(pickle) if location_a not in result_dict: result_dict[location_a] = { location_b: [ obj, ] } elif location_b not in result_dict[location_a]: result_dict[location_a][location_b] = [ obj, ] else: result_dict[location_a][location_b].append(obj) return result_dict def cleanup(self): ''' Cleanup internal data. ''' self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name) # Remove the old, create new. self.urls.cleanup() self.urls = DiskSet() self.fuzzable_requests.cleanup() self.fuzzable_requests = DiskSet() self.observers.clear() def remove(self): self.db.drop_table(self.table_name) self.urls.cleanup() self.fuzzable_requests.cleanup() self.observers.clear() def get_all_known_urls(self): ''' :return: A DiskSet with all the known URLs as URL objects. ''' return self.urls def add_url(self, url): ''' :return: True if the URL was previously unknown ''' if not isinstance(url, URL): msg = 'add_url requires a URL as parameter got %s instead.' raise TypeError(msg % type(url)) return self.urls.add(url) def get_all_known_fuzzable_requests(self): ''' :return: A DiskSet with all the known URLs as URL objects. ''' return self.fuzzable_requests def add_fuzzable_request(self, fuzzable_request): ''' :return: True if the FuzzableRequest was previously unknown ''' if not isinstance(fuzzable_request, FuzzableRequest): msg = 'add_fuzzable_request requires a FuzzableRequest as parameter.'\ 'got %s instead.' raise TypeError(msg % type(fuzzable_request)) self.add_url(fuzzable_request.get_url()) return self.fuzzable_requests.add(fuzzable_request)
def test_update(self): ds = DiskSet() ds.add(1) ds.update([2, 3, 1]) self.assertEqual(list(ds), [1, 2, 3])
def __init__(self): GrepPlugin.__init__(self) self._error_500_responses = DiskSet()
def __init__(self): CrawlPlugin.__init__(self) self._captchas_found = DiskSet()
class find_captchas(CrawlPlugin): """ Identify captcha images on web pages. :author: Andres Riancho ([email protected]) """ def __init__(self): CrawlPlugin.__init__(self) self._captchas_found = DiskSet() def crawl(self, fuzzable_request): """ Find CAPTCHA images. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ result, captchas = self._identify_captchas(fuzzable_request) if result: for captcha in captchas: desc = 'Found a CAPTCHA image at: "%s".' % captcha.img_src response_ids = [response.id for response in captcha.http_responses] i = Info("Captcha image detected", desc, response_ids, self.get_name()) i.set_uri(captcha.img_src) kb.kb.append(self, "CAPTCHA", i) om.out.information(i.get_desc()) def _identify_captchas(self, fuzzable_request): """ :return: A tuple with the following information: * True indicating that the page has CAPTCHAs * A list with tuples that contain: * The CAPTCHA image source * The http responses used to verify that the image was indeed a CAPTCHA """ found_captcha = False captchas = [] # GET the document, and fetch the images images_1 = self._get_images(fuzzable_request) # Re-GET the document, and fetch the images images_2 = self._get_images(fuzzable_request) # If the number of images in each response is different, don't even bother # to perform any analysis since our simplistic approach will fail. # TODO: Add something more advanced. if len(images_1) == len(images_2): not_in_2 = [] for img_src_1, img_hash_1, http_response_1 in images_1: for _, img_hash_2, http_response_2 in images_2: if img_hash_1 == img_hash_2: # The image is in both lists, can't be a CAPTCHA break else: not_in_2.append((img_src_1, img_hash_1, [http_response_1, http_response_2])) # Results # # TODO: This allows for more than one CAPTCHA in the same page. Does # that make sense? When that's found, should I simply declare # defeat and don't report anything? for img_src, _, http_responses in not_in_2: CaptchaInfo = namedtuple("CaptchaInfo", ["img_src", "http_responses"]) img_src = img_src.uri2url() if img_src not in self._captchas_found: self._captchas_found.add(img_src) found_captcha = True captchas.append(CaptchaInfo(img_src, http_responses)) return found_captcha, captchas def _get_images(self, fuzzable_request): """ Get all img tags and retrieve the src. :param fuzzable_request: The request to modify :return: A list with tuples containing (img_src, image_hash, http_response) """ res = [] try: response = self._uri_opener.GET(fuzzable_request.get_uri(), cache=False) except: om.out.debug("Failed to retrieve the page for finding captchas.") else: # Do not use parser_cache here, it's not good since CAPTCHA implementations # *might* change the image name for each request of the HTML # dp = parser_cache.dpc.get_document_parser_for( response ) try: document_parser = DocumentParser.DocumentParser(response) except w3afException: return [] image_path_list = document_parser.get_references_of_tag("img") GET = self._uri_opener.GET sha1 = hashlib.sha1 result_iter = self.worker_pool.imap_unordered(GET, image_path_list) for image_response in result_iter: if image_response.is_image(): img_src = image_response.get_uri() img_hash = sha1(image_response.get_body()).hexdigest() res.append((img_src, img_hash, response)) return res def end(self): self._captchas_found.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class find_captchas(CrawlPlugin): ''' Identify captcha images on web pages. :author: Andres Riancho ([email protected]) ''' def __init__(self): CrawlPlugin.__init__(self) self._captchas_found = DiskSet() def crawl(self, fuzzable_request): ''' Find CAPTCHA images. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' result, captchas = self._identify_captchas(fuzzable_request) if result: for captcha in captchas: desc = 'Found a CAPTCHA image at: "%s".' % captcha.img_src response_ids = [ response.id for response in captcha.http_responses ] i = Info('Captcha image detected', desc, response_ids, self.get_name()) i.set_uri(captcha.img_src) kb.kb.append(self, 'CAPTCHA', i) om.out.information(i.get_desc()) def _identify_captchas(self, fuzzable_request): ''' :return: A tuple with the following information: * True indicating that the page has CAPTCHAs * A list with tuples that contain: * The CAPTCHA image source * The http responses used to verify that the image was indeed a CAPTCHA ''' found_captcha = False captchas = [] # GET the document, and fetch the images images_1 = self._get_images(fuzzable_request) # Re-GET the document, and fetch the images images_2 = self._get_images(fuzzable_request) # If the number of images in each response is different, don't even bother # to perform any analysis since our simplistic approach will fail. # TODO: Add something more advanced. if len(images_1) == len(images_2): not_in_2 = [] for img_src_1, img_hash_1, http_response_1 in images_1: for _, img_hash_2, http_response_2 in images_2: if img_hash_1 == img_hash_2: # The image is in both lists, can't be a CAPTCHA break else: not_in_2.append((img_src_1, img_hash_1, [http_response_1, http_response_2])) # Results # # TODO: This allows for more than one CAPTCHA in the same page. Does # that make sense? When that's found, should I simply declare # defeat and don't report anything? for img_src, _, http_responses in not_in_2: CaptchaInfo = namedtuple('CaptchaInfo', ['img_src', 'http_responses']) img_src = img_src.uri2url() if img_src not in self._captchas_found: self._captchas_found.add(img_src) found_captcha = True captchas.append(CaptchaInfo(img_src, http_responses)) return found_captcha, captchas def _get_images(self, fuzzable_request): ''' Get all img tags and retrieve the src. :param fuzzable_request: The request to modify :return: A list with tuples containing (img_src, image_hash, http_response) ''' res = [] try: response = self._uri_opener.GET(fuzzable_request.get_uri(), cache=False) except: om.out.debug('Failed to retrieve the page for finding captchas.') else: # Do not use parser_cache here, it's not good since CAPTCHA implementations # *might* change the image name for each request of the HTML #dp = parser_cache.dpc.get_document_parser_for( response ) try: document_parser = DocumentParser.DocumentParser(response) except w3afException: return [] image_path_list = document_parser.get_references_of_tag('img') GET = self._uri_opener.GET sha1 = hashlib.sha1 result_iter = self.worker_pool.imap_unordered(GET, image_path_list) for image_response in result_iter: if image_response.is_image(): img_src = image_response.get_uri() img_hash = sha1(image_response.get_body()).hexdigest() res.append((img_src, img_hash, response)) return res def end(self): self._captchas_found.cleanup() def get_long_desc(self): ''' :return: A DETAILED description of the plugin functions and features. ''' return '''
class DBKnowledgeBase(BasicKnowledgeBase): ''' This class saves the data that is sent to it by plugins. It is the only way in which plugins can exchange information. Data is stored in a DB. :author: Andres Riancho ([email protected]) ''' def __init__(self): super(DBKnowledgeBase, self).__init__() self.urls = DiskSet() self.fuzzable_requests = DiskSet() self.db = get_default_persistent_db_instance() columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] self.table_name = rand_alpha(30) self.db.create_table(self.table_name, columns) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, ['uniq_id',]) self.db.commit() # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self.type_observers = {} #WeakValueDictionary() self.url_observers = [] self._observer_id = 0 def clear(self, location_a, location_b): location_a = self._get_real_name(location_a) query = "DELETE FROM %s WHERE location_a = ? and location_b = ?" params = (location_a, location_b) self.db.execute(query % self.table_name, params) def raw_write(self, location_a, location_b, value): ''' This method saves value to (location_a,location_b) but previously clears any pre-existing values. ''' if isinstance(value, Info): raise TypeError('Use append or append_uniq to store vulnerabilities') location_a = self._get_real_name(location_a) self.clear(location_a, location_b) self.append(location_a, location_b, value, ignore_type=True) def raw_read(self, location_a, location_b): ''' This method reads the value from (location_a,location_b) ''' location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=False) if len(result) > 1: msg = 'Incorrect use of raw_write/raw_read, found %s rows.' raise RuntimeError(msg % result) elif len(result) == 0: return [] else: return result[0] def _get_uniq_id(self, obj): if isinstance(obj, Info): return obj.get_uniq_id() else: if isinstance(obj, collections.Iterable): concat_all = ''.join([str(i) for i in obj]) return str(hash(concat_all)) else: return str(hash(obj)) def append(self, location_a, location_b, value, ignore_type=False): ''' This method appends the location_b value to a dict. ''' if not ignore_type and not isinstance(value, (Info, Shell)): msg = 'You MUST use raw_write/raw_read to store non-info objects'\ ' to the KnowledgeBase.' raise TypeError(msg) location_a = self._get_real_name(location_a) uniq_id = self._get_uniq_id(value) pickled_obj = cPickle.dumps(value) t = (location_a, location_b, uniq_id, pickled_obj) query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name self.db.execute(query, t) self._notify(location_a, location_b, value) def get(self, location_a, location_b, check_types=True): ''' :param location_a: The plugin that saved the data to the kb.info Typically the name of the plugin, but could also be the plugin instance. :param location_b: The name of the variables under which the vuln objects were saved. Typically the same name of the plugin, or something like "vulns", "errors", etc. In most cases this is NOT None. When set to None, a dict with all the vuln objects found by the plugin_name is returned. :return: Returns the data that was saved by another plugin. ''' location_a = self._get_real_name(location_a) if location_b is None: query = 'SELECT pickle FROM %s WHERE location_a = ?' params = (location_a,) else: query = 'SELECT pickle FROM %s WHERE location_a = ?'\ ' and location_b = ?' params = (location_a, location_b) result_lst = [] results = self.db.select(query % self.table_name, params) for r in results: obj = cPickle.loads(r[0]) if check_types and not isinstance(obj, (Info, Shell)): raise TypeError('Use raw_write and raw_read to query the' ' knowledge base for non-Info objects') result_lst.append(obj) return result_lst def get_by_uniq_id(self, uniq_id): query = 'SELECT pickle FROM %s WHERE uniq_id = ?' params = (uniq_id,) result = self.db.select_one(query % self.table_name, params) if result is not None: result = cPickle.loads(result[0]) return result def add_observer(self, location_a, location_b, observer): ''' Add the observer function to the observer list. The function will be called when there is a change in (location_a, location_b). You can use None in location_a or location_b as wildcards. The observer function needs to be a function which takes three params: * location_a * location_b * value that's added to the kb location :return: None ''' if not isinstance(location_a, (basestring, types.NoneType)) or \ not isinstance(location_a, (basestring, types.NoneType)): raise TypeError('Observer locations need to be strings or None.') observer_id = self.get_observer_id() self.observers[(location_a, location_b, observer_id)] = observer def add_types_observer(self, type_filter, observer): ''' Add the observer function to the list of functions to be called when a new object that is of type "type_filter" is added to the KB. The type_filter must be one of Info, Vuln or Shell. :return: None ''' if type_filter not in (Info, Vuln, Shell): msg = 'The type_filter needs to be one of Info, Vuln or Shell' raise TypeError(msg) observer_id = self.get_observer_id() self.type_observers[(type_filter, observer_id)] = observer def get_observer_id(self): self._observer_id += 1 return self._observer_id def _notify(self, location_a, location_b, value): ''' Call the observer if the location_a/location_b matches with the configured observers. :return: None ''' # Note that I copy the items list in order to iterate though it without # any issues like the size changing for (obs_loc_a, obs_loc_b, _), observer in self.observers.items()[:]: if obs_loc_a is None and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b == location_b: observer(location_a, location_b, value) continue for (type_filter, _), observer in self.type_observers.items()[:]: if isinstance(value, type_filter): observer(location_a, location_b, value) def get_all_entries_of_class(self, klass): ''' :return: A list of all objects of class == klass that are saved in the kb. ''' query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if isinstance(obj, klass): result_lst.append(obj) return result_lst def dump(self): result_dict = {} query = 'SELECT location_a, location_b, pickle FROM %s' results = self.db.select(query % self.table_name) for location_a, location_b, pickle in results: obj = cPickle.loads(pickle) if location_a not in result_dict: result_dict[location_a] = {location_b: [obj,]} elif location_b not in result_dict[location_a]: result_dict[location_a][location_b] = [obj,] else: result_dict[location_a][location_b].append(obj) return result_dict def cleanup(self): ''' Cleanup internal data. ''' self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name) # Remove the old, create new. self.urls.cleanup() self.urls = DiskSet() self.fuzzable_requests.cleanup() self.fuzzable_requests = DiskSet() self.observers.clear() def remove(self): self.db.drop_table(self.table_name) self.urls.cleanup() self.fuzzable_requests.cleanup() self.observers.clear() def get_all_known_urls(self): ''' :return: A DiskSet with all the known URLs as URL objects. ''' return self.urls def add_url_observer(self, observer): self.url_observers.append(observer) def _notify_url_observers(self, new_url): ''' Call the observer with new_url. :return: None ''' # Note that I copy the items list in order to iterate though it without # any issues like the size changing for observer in self.url_observers[:]: observer(new_url) def add_url(self, url): ''' :return: True if the URL was previously unknown ''' if not isinstance(url, URL): msg = 'add_url requires a URL as parameter got %s instead.' raise TypeError(msg % type(url)) self._notify_url_observers(url) return self.urls.add(url) def get_all_known_fuzzable_requests(self): ''' :return: A DiskSet with all the known URLs as URL objects. ''' return self.fuzzable_requests def add_fuzzable_request(self, fuzzable_request): ''' :return: True if the FuzzableRequest was previously unknown ''' if not isinstance(fuzzable_request, FuzzableRequest): msg = 'add_fuzzable_request requires a FuzzableRequest as parameter.'\ 'got %s instead.' raise TypeError(msg % type(fuzzable_request)) self.add_url(fuzzable_request.get_url()) return self.fuzzable_requests.add(fuzzable_request)