def test_iterkeys(self): disk_dict = DiskDict() disk_dict['a'] = 'abc' disk_dict['b'] = 'abc' disk_dict['c'] = 'abc' self.assertEqual(set(disk_dict.iterkeys()), set(['a', 'b', 'c']))
class html_comments(GrepPlugin): """ Extract and analyze HTML comments. :author: Andres Riancho ([email protected]) """ HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>') INTERESTING_WORDS = ( # In English 'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack', 'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit', 'pass', 'password', 'passwd', 'pwd', 'secret', 'stupid', # In Spanish 'tonto', 'porqueria', 'cuidado', 'usuario', u'contraseña', 'puta', 'email', 'security', 'captcha', 'pinga', 'cojones', # some in Portuguese 'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia', u'depósito', u'cartão', u'crédito', 'dados pessoais' ) _multi_in = multi_in([' %s ' % w for w in INTERESTING_WORDS]) def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict() self._already_reported_interesting = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point, parse those comments! :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html(): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return for comment in dp.get_comments(): # These next two lines fix this issue: # audit.ssi + grep.html_comments + web app with XSS = false positive if request.sent(comment): continue if self._is_new(comment, response): self._interesting_word(comment, request, response) self._html_in_comment(comment, request, response) def _interesting_word(self, comment, request, response): """ Find interesting words in HTML comments """ comment = comment.lower() for word in self._multi_in.query(comment): if (word, response.get_url()) not in self._already_reported_interesting: desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (word, response.get_url()) i = Info('Interesting HTML comment', desc, response.id, self.get_name()) i.set_dc(request.get_dc()) i.set_uri(response.get_uri()) i.add_to_highlight(word) kb.kb.append(self, 'interesting_comments', i) om.out.information(i.get_desc()) self._already_reported_interesting.add((word, response.get_url())) def _html_in_comment(self, comment, request, response): """ Find HTML code in HTML comments """ html_in_comment = self.HTML_RE.search(comment) if html_in_comment and \ (comment, response.get_url()) not in self._already_reported_interesting: # There is HTML code in the comment. comment = comment.strip() comment = comment.replace('\n', '') comment = comment.replace('\r', '') comment = comment[:40] desc = 'A comment with the string "%s" was found in: "%s".'\ ' This could be interesting.' desc = desc % (comment, response.get_url()) i = Info('HTML comment contains HTML code', desc, response.id, self.get_name()) i.set_dc(request.get_dc()) i.set_uri(response.get_uri()) i.add_to_highlight(html_in_comment.group(0)) kb.kb.append(self, 'html_comment_hides_html', i) om.out.information(i.get_desc()) self._already_reported_interesting.add( (comment, response.get_url())) def _is_new(self, comment, response): """ Make sure that we perform a thread safe check on the self._comments dict, in order to avoid duplicates. """ with self._plugin_lock: #pylint: disable=E1103 comment_data = self._comments.get(comment, None) if comment_data is None: self._comments[comment] = [(response.get_url(), response.id), ] return True else: if response.get_url() not in [x[0] for x in comment_data]: comment_data.append((response.get_url(), response.id)) self._comments[comment] = comment_data return True #pylint: enable=E1103 return False def end(self): """ This method is called when the plugin wont be used anymore. :return: None """ inform = [] for comment in self._comments.iterkeys(): urls_with_this_comment = self._comments[comment] stick_comment = ' '.join(comment.split()) if len(stick_comment) > 40: msg = 'A comment with the string "%s..." (and %s more bytes)'\ ' was found on these URL(s):' om.out.information( msg % (stick_comment[:40], str(len(stick_comment) - 40))) else: msg = 'A comment containing "%s" was found on these URL(s):' om.out.information(msg % (stick_comment)) for url, request_id in urls_with_this_comment: inform.append('- ' + url + ' (request with id: ' + str(request_id) + ')') inform.sort() for i in inform: om.out.information(i) self._comments.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """