class html_comments(GrepPlugin): """ Extract and analyze HTML comments. :author: Andres Riancho ([email protected]) """ HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>') INTERESTING_WORDS = ( # In English 'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack', 'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit', 'pass', 'password', 'passwd', 'pwd', 'secret', 'stupid', # In Spanish 'tonto', 'porqueria', 'cuidado', 'usuario', u'contraseña', 'puta', 'email', 'security', 'captcha', 'pinga', 'cojones', # some in Portuguese 'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia', u'depósito', u'cartão', u'crédito', 'dados pessoais' ) _multi_in = multi_in([' %s ' % w for w in INTERESTING_WORDS]) def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict(table_prefix='html_comments') self._already_reported = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point, parse those comments! :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html(): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return for comment in dp.get_comments(): # These next two lines fix this issue: # audit.ssi + grep.html_comments + web app with XSS = false positive if request.sent(comment): continue if self._is_new(comment, response): self._interesting_word(comment, request, response) self._html_in_comment(comment, request, response) def _interesting_word(self, comment, request, response): """ Find interesting words in HTML comments """ comment = comment.lower() for word in self._multi_in.query(comment): if (word, response.get_url()) in self._already_reported: continue desc = ('A comment with the string "%s" was found in: "%s".' ' This could be interesting.') desc %= (word, response.get_url()) i = Info.from_fr('Interesting HTML comment', desc, response.id, self.get_name(), request) i.add_to_highlight(word) kb.kb.append(self, 'interesting_comments', i) om.out.information(i.get_desc()) self._already_reported.add((word, response.get_url())) def _html_in_comment(self, comment, request, response): """ Find HTML code in HTML comments """ html_in_comment = self.HTML_RE.search(comment) if html_in_comment is None: return if (comment, response.get_url()) in self._already_reported: return # There is HTML code in the comment. comment = comment.strip() comment = comment.replace('\n', '') comment = comment.replace('\r', '') comment = comment[:40] desc = ('A comment with the string "%s" was found in: "%s".' ' This could be interesting.') desc %= (comment, response.get_url()) i = Info.from_fr('HTML comment contains HTML code', desc, response.id, self.get_name(), request) i.set_uri(response.get_uri()) i.add_to_highlight(html_in_comment.group(0)) kb.kb.append(self, 'html_comment_hides_html', i) om.out.information(i.get_desc()) self._already_reported.add((comment, response.get_url())) def _is_new(self, comment, response): """ Make sure that we perform a thread safe check on the self._comments dict, in order to avoid duplicates. """ with self._plugin_lock: #pylint: disable=E1103 comment_data = self._comments.get(comment, None) response_url = response.get_url() if comment_data is None: self._comments[comment] = [(response_url, response.id)] return True else: for saved_url, response_id in comment_data: if response_url == saved_url: return False else: comment_data.append((response_url, response.id)) self._comments[comment] = comment_data return True #pylint: enable=E1103 def end(self): """ This method is called when the plugin wont be used anymore. :return: None """ for comment, url_request_id_lst in self._comments.iteritems(): stick_comment = ' '.join(comment.split()) if len(stick_comment) > 40: msg = ('A comment with the string "%s..." (and %s more bytes)' ' was found on these URL(s):') args = (stick_comment[:40], str(len(stick_comment) - 40)) om.out.information(msg % args) else: msg = 'A comment containing "%s" was found on these URL(s):' om.out.information(msg % stick_comment) inform = [] for url, request_id in url_request_id_lst: msg = '- %s (request with id: %s)' inform.append(msg % (url, request_id)) for i in sorted(inform): om.out.information(i) self._comments.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class html_comments(GrepPlugin): """ Extract and analyze HTML comments. :author: Andres Riancho ([email protected]) """ HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>') INTERESTING_WORDS = ( # In English 'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack', 'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit', 'pass', 'password', 'passwd', 'pwd', 'secret', 'stupid', # In Spanish 'tonto', 'porqueria', 'cuidado', 'usuario', u'contraseña', 'puta', 'email', 'security', 'captcha', 'pinga', 'cojones', # some in Portuguese 'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia', u'depósito', u'cartão', u'crédito', 'dados pessoais') _multi_in = multi_in([' %s ' % w for w in INTERESTING_WORDS]) def __init__(self): GrepPlugin.__init__(self) # Internal variables self._comments = DiskDict(table_prefix='html_comments') self._already_reported = ScalableBloomFilter() def grep(self, request, response): """ Plugin entry point, parse those comments! :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html(): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return for comment in dp.get_comments(): # These next two lines fix this issue: # audit.ssi + grep.html_comments + web app with XSS = false positive if request.sent(comment): continue if self._is_new(comment, response): self._interesting_word(comment, request, response) self._html_in_comment(comment, request, response) def _interesting_word(self, comment, request, response): """ Find interesting words in HTML comments """ comment = comment.lower() for word in self._multi_in.query(comment): if (word, response.get_url()) in self._already_reported: continue desc = ('A comment with the string "%s" was found in: "%s".' ' This could be interesting.') desc %= (word, response.get_url()) v = Vuln.from_fr('Interesting HTML comment', desc, severity.INFORMATION, response.id, self.get_name(), request) v.add_to_highlight(word) kb.kb.append(self, 'interesting_comments', v) self._already_reported.add((word, response.get_url())) def _html_in_comment(self, comment, request, response): """ Find HTML code in HTML comments """ html_in_comment = self.HTML_RE.search(comment) if html_in_comment is None: return if (comment, response.get_url()) in self._already_reported: return # There is HTML code in the comment. comment = comment.strip() comment = comment.replace('\n', '') comment = comment.replace('\r', '') comment = comment[:40] desc = ('A comment with the string "%s" was found in: "%s".' ' This could be interesting.') desc %= (comment, response.get_url()) v = Vuln.from_fr('HTML comment contains HTML code', desc, severity.INFORMATION, response.id, self.get_name(), request) v.set_uri(response.get_uri()) v.add_to_highlight(html_in_comment.group(0)) om.out.vulnerability(v.get_desc(), severity=severity.INFORMATION) kb.kb.append(self, 'html_comment_hides_html', v) self._already_reported.add((comment, response.get_url())) def _is_new(self, comment, response): """ Make sure that we perform a thread safe check on the self._comments dict, in order to avoid duplicates. """ with self._plugin_lock: #pylint: disable=E1103 comment_data = self._comments.get(comment, None) response_url = response.get_url() if comment_data is None: self._comments[comment] = [(response_url, response.id)] return True else: for saved_url, response_id in comment_data: if response_url == saved_url: return False else: comment_data.append((response_url, response.id)) self._comments[comment] = comment_data return True #pylint: enable=E1103 def end(self): """ This method is called when the plugin wont be used anymore. :return: None """ for comment, url_request_id_lst in self._comments.iteritems(): stick_comment = ' '.join(comment.split()) if len(stick_comment) > 40: msg = ('A comment with the string "%s..." (and %s more bytes)' ' was found on these URL(s):') args = (stick_comment[:40], str(len(stick_comment) - 40)) om.out.vulnerability(msg % args, severity=severity.INFORMATION) else: msg = 'A comment containing "%s" was found on these URL(s):' om.out.vulnerability(msg % stick_comment, severity=severity.INFORMATION) inform = [] for url, request_id in url_request_id_lst: msg = '- %s (request with id: %s)' inform.append(msg % (url, request_id)) for i in sorted(inform): om.out.vulnerability(i, severity=severity.INFORMATION) self._comments.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class CachedDiskDict(object): """ This data structure keeps the `max_in_memory` most frequently accessed keys in memory and stores the rest on disk. It is ideal for situations where a DiskDict is frequently accessed, fast read / writes are required, and items can take considerable amounts of memory. """ def __init__(self, max_in_memory=50, table_prefix=None): """ :param max_in_memory: The max number of items to keep in memory """ assert max_in_memory > 0, 'In-memory items must be > 0' table_prefix = self._get_table_prefix(table_prefix) self._max_in_memory = max_in_memory self._disk_dict = DiskDict(table_prefix=table_prefix) self._in_memory = dict() self._access_count = Counter() def cleanup(self): self._disk_dict.cleanup() def _get_table_prefix(self, table_prefix): if table_prefix is None: table_prefix = 'cached_disk_dict_%s' % rand_alpha(16) else: args = (table_prefix, rand_alpha(16)) table_prefix = 'cached_disk_dict_%s_%s' % args return table_prefix def get(self, key, default=-456): try: return self[key] except KeyError: if default is not -456: return default raise KeyError() def __getitem__(self, key): try: value = self._in_memory[key] except KeyError: # This will raise KeyError if k is not found, and that is OK # because we don't need to increase the access count when the # key doesn't exist value = self._disk_dict[key] self._increase_access_count(key) return value def _get_keys_for_memory(self): """ :return: Generate the names of the keys that should be kept in memory. For example, if `max_in_memory` is set to 2 and: _in_memory: {1: None, 2: None} _access_count: {1: 10, 2: 20, 3: 5} _disk_dict: {3: None} Then the method will generate [1, 2]. """ return [k for k, v in self._access_count.most_common(self._max_in_memory)] def _increase_access_count(self, key): self._access_count.update([key]) keys_for_memory = self._get_keys_for_memory() self._move_key_to_disk_if_needed(keys_for_memory) self._move_key_to_memory_if_needed(key, keys_for_memory) def _move_key_to_disk_if_needed(self, keys_for_memory): """ Analyzes the current access count for the last accessed key and checks if any if the keys in memory should be moved to disk. :param keys_for_memory: The keys that should be in memory :return: The name of the key that was moved to disk, or None if all the keys are still in memory. """ for key in self._in_memory: if key in keys_for_memory: continue try: value = self._in_memory.pop(key) except KeyError: return else: self._disk_dict[key] = value return key def _move_key_to_memory_if_needed(self, key, keys_for_memory): """ Analyzes the current access count for the last accessed key and checks if any if the keys in disk should be moved to memory. :param key: The key that was last accessed :param keys_for_memory: The keys that should be in memory :return: The name of the key that was moved to memory, or None if all the keys are still on disk. """ # The key is already in memory, nothing to do here if key in self._in_memory: return # The key must not be in memory, nothing to do here if key not in keys_for_memory: return try: value = self._disk_dict.pop(key) except KeyError: return else: self._in_memory[key] = value return key def __setitem__(self, key, value): if key in self._in_memory: self._in_memory[key] = value elif len(self._in_memory) < self._max_in_memory: self._in_memory[key] = value else: self._disk_dict[key] = value self._increase_access_count(key) def __delitem__(self, key): try: del self._in_memory[key] except KeyError: # This will raise KeyError if k is not found, and that is OK # because we don't need to increase the access count when the # key doesn't exist del self._disk_dict[key] try: del self._access_count[key] except KeyError: # Another thread removed this key pass def __contains__(self, key): if key in self._in_memory: self._increase_access_count(key) return True if key in self._disk_dict: self._increase_access_count(key) return True return False def __iter__(self): """ Decided not to increase the access count when iterating through the items. In most cases the iteration will be performed on all items, thus increasing the access count +1 for each, which will leave all access counts +1, forcing no movements between memory and disk. """ for key in self._in_memory: yield key for key in self._disk_dict: yield key def iteritems(self): for key, value in self._in_memory.iteritems(): yield key, value for key, value in self._disk_dict.iteritems(): yield key, value