def test_get(self): disk_dict = DiskDict() disk_dict[0] = 'abc' abc1 = disk_dict.get(0) abc2 = disk_dict.get(0, 1) two = disk_dict.get(1, 2) self.assertEqual(abc1, 'abc') self.assertEqual(abc2, 'abc') self.assertEqual(two, 2)
class html_comments(GrepPlugin): ''' Extract and analyze HTML comments. :author: Andres Riancho ([email protected]) ''' HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>') INTERESTING_WORDS = ( 'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack', 'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit', 'stupid', 'tonto', 'porqueria', 'ciudado', 'usuario', 'contrase', 'puta', 'secret', '@', 'email', 'security', 'captcha', 'pinga', 'cojones', # some in Portuguese 'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia', u'depósito', u'cartão', u'crédito', 'dados pessoais' ) _multi_in = multi_in(INTERESTING_WORDS) def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict() self._already_reported_interesting = ScalableBloomFilter() def grep(self, request, response): ''' Plugin entry point, parse those comments! :param request: The HTTP request object. :param response: The HTTP response object :return: None ''' if not response.is_text_or_html(): return try: dp = parser_cache.dpc.get_document_parser_for(response) except w3afException: return for comment in dp.get_comments(): # These next two lines fix this issue: # audit.ssi + grep.html_comments + web app with XSS = false positive if request.sent(comment): continue # show nice comments ;) comment = comment.strip() if self._is_new(comment, response): self._interesting_word(comment, request, response) self._html_in_comment(comment, request, response) def _interesting_word(self, comment, request, response): ''' Find interesting words in HTML comments ''' comment = comment.lower() for word in self._multi_in.query(response.body): if (word, response.get_url()) not in self._already_reported_interesting: desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (word, response.get_url()) i = Info('Interesting HTML comment', desc, response.id, self.get_name()) i.set_dc(request.get_dc()) i.set_uri(response.get_uri()) i.add_to_highlight(word) kb.kb.append(self, 'interesting_comments', i) om.out.information(i.get_desc()) self._already_reported_interesting.add((word, response.get_url())) def _html_in_comment(self, comment, request, response): ''' Find HTML code in HTML comments ''' html_in_comment = self.HTML_RE.search(comment) if html_in_comment and \ (comment, response.get_url()) not in self._already_reported_interesting: # There is HTML code in the comment. comment = comment.replace('\n', '') comment = comment.replace('\r', '') desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (comment, response.get_url()) i = Info('HTML comment contains HTML code', desc, response.id, self.get_name()) i.set_dc(request.get_dc()) i.set_uri(response.get_uri()) i.add_to_highlight(html_in_comment.group(0)) kb.kb.append(self, 'html_comment_hides_html', i) om.out.information(i.get_desc()) self._already_reported_interesting.add( (comment, response.get_url())) def _is_new(self, comment, response): ''' Make sure that we perform a thread safe check on the self._comments dict, in order to avoid duplicates. ''' with self._plugin_lock: #pylint: disable=E1103 comment_data = self._comments.get(comment, None) if comment_data is None: self._comments[comment] = [(response.get_url(), response.id), ] return True else: if response.get_url() not in [x[0] for x in comment_data]: comment_data.append((response.get_url(), response.id)) self._comments[comment] = comment_data return True #pylint: enable=E1103 return False def end(self): ''' This method is called when the plugin wont be used anymore. :return: None ''' inform = [] for comment in self._comments.iterkeys(): urls_with_this_comment = self._comments[comment] stick_comment = ' '.join(comment.split()) if len(stick_comment) > 40: msg = 'A comment with the string "%s..." (and %s more bytes)'\ ' was found on these URL(s):' om.out.information( msg % (stick_comment[:40], str(len(stick_comment) - 40))) else: msg = 'A comment containing "%s" was found on these URL(s):' om.out.information(msg % (stick_comment)) for url, request_id in urls_with_this_comment: inform.append('- ' + url + ' (request with id: ' + str(request_id) + ')') inform.sort() for i in inform: om.out.information(i) self._comments.cleanup() def get_long_desc(self): ''' :return: A DETAILED description of the plugin functions and features. ''' return '''
class VariantDB(object): def __init__(self, max_variants=5): self._disk_dict = DiskDict() self._db_lock = threading.RLock() self.max_variants = max_variants def append(self, reference): ''' Called when a new reference is found and we proved that new variants are still needed. :param reference: The reference (as a URL object) to add. This method will "normalize" it before adding it to the internal shelve. ''' clean_reference = self._clean_reference(reference) with self._db_lock: count = self._disk_dict.get(clean_reference, None) if count is not None: self._disk_dict[clean_reference] = count + 1 else: self._disk_dict[clean_reference] = 1 def _clean_reference(self, reference): ''' This method is VERY dependent on the are_variants method from core.data.request.variant_identification , make sure to remember that when changing stuff here or there. What this method does is to "normalize" any input reference string so that they can be compared very simply using string match. ''' res = reference.get_domain_path() + reference.get_file_name() if reference.has_query_string(): res += '?' qs = reference.querystring.copy() for key in qs: value_list = qs[key] for i, value in enumerate(value_list): if value.isdigit(): qs[key][i] = 'number' else: qs[key][i] = 'string' res += str(qs) return res def need_more_variants(self, reference): ''' :return: True if there are not enough variants associated with this reference in the DB. ''' clean_reference = self._clean_reference(reference) # I believe this is atomic enough... count = self._disk_dict.get(clean_reference, 0) if count >= self.max_variants: return False else: return True
class html_comments(GrepPlugin): ''' Extract and analyze HTML comments. :author: Andres Riancho ([email protected]) ''' HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>') INTERESTING_WORDS = ( 'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack', 'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit', 'stupid', 'tonto', 'porqueria', 'ciudado', 'usuario', 'contrase', 'puta', 'secret', '@', 'email', 'security', 'captcha', 'pinga', 'cojones', # some in Portuguese 'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia', u'depósito', u'cartão', u'crédito', 'dados pessoais') _multi_in = multi_in(INTERESTING_WORDS) def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict() self._already_reported_interesting = ScalableBloomFilter() def grep(self, request, response): ''' Plugin entry point, parse those comments! :param request: The HTTP request object. :param response: The HTTP response object :return: None ''' if not response.is_text_or_html(): return try: dp = parser_cache.dpc.get_document_parser_for(response) except w3afException: return for comment in dp.get_comments(): # These next two lines fix this issue: # audit.ssi + grep.html_comments + web app with XSS = false positive if request.sent(comment): continue # show nice comments ;) comment = comment.strip() if self._is_new(comment, response): self._interesting_word(comment, request, response) self._html_in_comment(comment, request, response) def _interesting_word(self, comment, request, response): ''' Find interesting words in HTML comments ''' comment = comment.lower() for word in self._multi_in.query(response.body): if (word, response.get_url() ) not in self._already_reported_interesting: desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (word, response.get_url()) i = Info('Interesting HTML comment', desc, response.id, self.get_name()) i.set_dc(request.get_dc()) i.set_uri(response.get_uri()) i.add_to_highlight(word) kb.kb.append(self, 'interesting_comments', i) om.out.information(i.get_desc()) self._already_reported_interesting.add( (word, response.get_url())) def _html_in_comment(self, comment, request, response): ''' Find HTML code in HTML comments ''' html_in_comment = self.HTML_RE.search(comment) if html_in_comment and \ (comment, response.get_url()) not in self._already_reported_interesting: # There is HTML code in the comment. comment = comment.replace('\n', '') comment = comment.replace('\r', '') desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (comment, response.get_url()) i = Info('HTML comment contains HTML code', desc, response.id, self.get_name()) i.set_dc(request.get_dc()) i.set_uri(response.get_uri()) i.add_to_highlight(html_in_comment.group(0)) kb.kb.append(self, 'html_comment_hides_html', i) om.out.information(i.get_desc()) self._already_reported_interesting.add( (comment, response.get_url())) def _is_new(self, comment, response): ''' Make sure that we perform a thread safe check on the self._comments dict, in order to avoid duplicates. ''' with self._plugin_lock: #pylint: disable=E1103 comment_data = self._comments.get(comment, None) if comment_data is None: self._comments[comment] = [ (response.get_url(), response.id), ] return True else: if response.get_url() not in [x[0] for x in comment_data]: comment_data.append((response.get_url(), response.id)) self._comments[comment] = comment_data return True #pylint: enable=E1103 return False def end(self): ''' This method is called when the plugin wont be used anymore. :return: None ''' inform = [] for comment in self._comments.iterkeys(): urls_with_this_comment = self._comments[comment] stick_comment = ' '.join(comment.split()) if len(stick_comment) > 40: msg = 'A comment with the string "%s..." (and %s more bytes)'\ ' was found on these URL(s):' om.out.information( msg % (stick_comment[:40], str(len(stick_comment) - 40))) else: msg = 'A comment containing "%s" was found on these URL(s):' om.out.information(msg % (stick_comment)) for url, request_id in urls_with_this_comment: inform.append('- ' + url + ' (request with id: ' + str(request_id) + ')') inform.sort() for i in inform: om.out.information(i) self._comments.cleanup() def get_long_desc(self): ''' :return: A DETAILED description of the plugin functions and features. ''' return '''