def test_1557_random_number_of_results(self): """ Pseudo-random number of vulnerabilities found in audit phase (xss) https://github.com/andresriancho/w3af/issues/1557 """ script = TEST_SCRIPT_1557 % (OUTPUT_PATH, get_wavsep_http()) file(SCRIPT_PATH, "w").write(script) python_executable = sys.executable VULN_STRING = "A Cross Site Scripting vulnerability was found at" URL_VULN_RE = re.compile('%s: "(.*?)"' % VULN_STRING) all_previous_vulns = [] loops = 2 if is_running_on_ci() else 10 for i in xrange(loops): print("Start run #%s" % i) found_vulns = set() p = subprocess.Popen( [python_executable, "w3af_console", "-n", "-s", SCRIPT_PATH], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=False, universal_newlines=True, ) stdout, stderr = p.communicate() i_vuln_count = stdout.count(VULN_STRING) print("%s vulnerabilities found" % i_vuln_count) self.assertNotEqual(i_vuln_count, 0, stdout) for line in stdout.split("\n"): if VULN_STRING in line: found_vulns.add(URL_VULN_RE.search(line).group(1)) for previous_found in all_previous_vulns: self.assertEqual(found_vulns, previous_found) all_previous_vulns.append(found_vulns)
def test_1557_random_number_of_results(self): """ Pseudo-random number of vulnerabilities found in audit phase (xss) https://github.com/andresriancho/w3af/issues/1557 """ script = TEST_SCRIPT_1557 % (OUTPUT_PATH, get_wavsep_http()) file(SCRIPT_PATH, 'w').write(script) python_executable = sys.executable VULN_STRING = 'A Cross Site Scripting vulnerability was found at' URL_VULN_RE = re.compile('%s: "(.*?)"' % VULN_STRING) all_previous_vulns = [] loops = 2 if is_running_on_ci() else 10 for i in xrange(loops): print('Start run #%s' % i) found_vulns = set() p = subprocess.Popen( [python_executable, 'w3af_console', '-n', '-s', SCRIPT_PATH], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=False, universal_newlines=True) stdout, stderr = p.communicate() i_vuln_count = stdout.count(VULN_STRING) print('%s vulnerabilities found' % i_vuln_count) self.assertNotEqual(i_vuln_count, 0, stdout) for line in stdout.split('\n'): if VULN_STRING in line: found_vulns.add(URL_VULN_RE.search(line).group(1)) for previous_found in all_previous_vulns: self.assertEqual(found_vulns, previous_found) all_previous_vulns.append(found_vulns)
def _inner_func(*args, **kwds): if is_running_on_ci(): return decorated_func(*args, **kwds)
class MultiProcessingDocumentParser(object): """ A document parser that performs all it's tasks in different processes and returns results to the main process. Also implements a parsing timeout just in case the parser enters an infinite loop. :author: Andres Riancho ([email protected]) """ DEBUG = core_profiling_is_enabled() MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1 # Increasing the timeout when profiling is enabled seems to fix issue #9713 # # https://github.com/andresriancho/w3af/issues/9713 PROFILING_ENABLED = (user_wants_memory_profiling() or user_wants_pytracemalloc() or user_wants_cpu_profiling()) # in seconds PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10 # Document parsers can go crazy on memory usage when parsing some very # specific HTML / PDF documents. Sometimes when this happens the operating # system does an out of memory (OOM) kill of a "randomly chosen" process. # # We limit the memory which can be used by parsing processes to this constant # # The feature was tested in test_pebble_limit_memory_usage.py MEMORY_LIMIT = get_memory_limit() def __init__(self): self._pool = None self._start_lock = threading.RLock() def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # Start the process pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, max_tasks=20, initializer=init_worker, initargs=(log_queue, self.MEMORY_LIMIT)) return self._pool def stop_workers(self): """ Stop the pool workers :return: None """ if self._pool is not None: self._pool.stop() self._pool.join() self._pool = None def get_document_parser_for(self, http_response): """ Get a document parser for http_response This parses the http_response in a pool worker. This method has two features: * We can kill the worker if the parser is taking too long * We can have different parsers :param http_response: The http response instance :return: An instance of DocumentParser """ # Start the worker processes if needed self.start_workers() apply_args = (process_document_parser, http_response, self.DEBUG) # Push the task to the workers try: future = self._pool.schedule(apply_with_return_error, args=(apply_args, ), timeout=self.PARSER_TIMEOUT) except RuntimeError, rte: # We get here when the pebble pool management thread dies and # suddenly starts answering all calls with: # # RuntimeError('Unexpected error within the Pool') # # The scan needs to stop because we can't parse any more # HTTP responses, which is a very critical part of the process msg = str(rte) raise ScanMustStopException(msg) try: parser_output = future.result() except TimeoutError: msg = ('[timeout] The parser took more than %s seconds' ' to complete parsing of "%s", killed it!') args = (self.PARSER_TIMEOUT, http_response.get_url()) raise TimeoutError(msg % args) # We still need to perform some error handling here... if isinstance(parser_output, Error): if isinstance(parser_output.exc_value, MemoryError): msg = ('The parser exceeded the memory usage limit of %s bytes' ' while trying to parse "%s". The parser was stopped in' ' order to prevent OOM issues.') args = (self.MEMORY_LIMIT, http_response.get_url()) om.out.debug(msg % args) raise MemoryError(msg % args) parser_output.reraise() # Success! return parser_output
class MultiProcessingDocumentParser(object): """ A document parser that performs all it's tasks in different processes and returns results to the main process. Also implements a parsing timeout just in case the parser enters an infinite loop. :author: Andres Riancho ([email protected]) """ DEBUG = core_profiling_is_enabled() MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1 # Increasing the timeout when profiling is enabled seems to fix issue #9713 # # https://github.com/andresriancho/w3af/issues/9713 PROFILING_ENABLED = (user_wants_memory_profiling() or user_wants_pytracemalloc() or user_wants_cpu_profiling()) # in seconds PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10 def __init__(self): self._pool = None self._processes = None self._start_lock = threading.RLock() def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # pylint: disable=E1101 # Keep track of which pid is processing which http response self._processes = manager.dict() # pylint: enable=E1101 # The pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, maxtasksperchild=20, initializer=init_worker, initargs=(log_queue,)) return self._pool def stop_workers(self): """ Stop the pool workers :return: None """ if self._pool is not None: self._pool.terminate() self._pool = None if self._processes is not None: self._processes.clear() self._processes = None def _kill_parser_process(self, hash_string, http_response): """ Kill the process that's handling the parsing of http_response which can be identified by hash_string :param hash_string: The hash for the http_response :param http_response: The HTTP response which is being parsed :return: None """ # Near the timeout error, so we make sure that the pid is still # running our "buggy" input pid = self._processes.pop(hash_string, None) if pid is not None: try: os.kill(pid, signal.SIGTERM) except OSError, ose: msg = ('An error occurred while killing the parser' ' process: "%s"') om.out.debug(msg % ose) msg = ('[timeout] The parser took more than %s seconds to complete' ' parsing of "%s", killed it!') if self.PROFILING_ENABLED: msg += (' You are running a profiling session which requires more' ' CPU and resources to be run; the' ' MultiProcessingDocumentParser failed to parse the HTML' ' document. Try to increase the PARSER_TIMEOUT and try' ' again.\n\n' 'This issue invalidates the profiling session!\n\n' 'See issue #9713 for more information' ' https://github.com/andresriancho/w3af/issues/9713') log_function = om.out.error if self.PROFILING_ENABLED else om.out.debug log_function(msg % (self.PARSER_TIMEOUT, http_response.get_url()))
import os import multiprocessing from w3af.core.controllers.ci.detect import is_running_on_ci ARTIFACT_DIR = os.environ.get('CIRCLE_ARTIFACTS', '/tmp/') LOG_FILE = os.path.join(ARTIFACT_DIR, 'nosetests.log') # How many nosetests commands to run at the same time # # At CircleCI I've got 32 cores to use, but don't want to use them all with # nosetests (other important stuff like docker is running too), so I set a fixed # value if is_running_on_ci(): MAX_WORKERS = 20 else: MAX_WORKERS = max(multiprocessing.cpu_count() - 1, 2) # How many tests to send to each process # # Usually lower numbers are better here. A high chunk size will usually lead to # larger delays. CHUNK_SIZE = 3 # Where the test ids will be stored ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.pickle') JSON_ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.json') NOSETESTS = 'nosetests' # Not using code coverage (--with-cov --cov-report=xml) due to:
class ParserCache(object): """ This class is a document parser cache. :author: Andres Riancho ([email protected]) """ LRU_LENGTH = 40 MAX_CACHEABLE_BODY_LEN = 1024 * 1024 PARSER_TIMEOUT = 60 # in seconds DEBUG = False MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1 def __init__(self): self._cache = SynchronizedLRUDict(self.LRU_LENGTH) self._pool = None self._processes = None self._parser_finished_events = {} self._start_lock = threading.RLock() # These are here for debugging: self._archive = set() self._from_LRU = 0.0 self._calculated_more_than_once = 0.0 self._total = 0.0 def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # Keep track of which pid is processing which http response # pylint: disable=E1101 self._processes = manager.dict() # pylint: enable=E1101 # The pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, maxtasksperchild=25, initializer=init_worker, initargs=(log_queue, )) return self._pool def stop_workers(self): """ Stop the pool workers :return: None """ if self._pool is not None: self._pool.terminate() self._pool = None self._processes = None # We don't need this data anymore self._cache.clear() if self.DEBUG: re_calc_rate = (self._calculated_more_than_once / self._total) print('parser_cache LRU rate: %s' % (self._from_LRU / self._total)) print('parser_cache re-calculation rate: %s' % re_calc_rate) print('parser_cache size: %s' % self.LRU_LENGTH) def get_cache_key(self, http_response): """ Before I used md5, but I realized that it was unnecessary. I experimented a little bit with python's hash functions and the builtin hash was the fastest. At first I thought that the built-in hash wasn't good enough, as it could create collisions... but... given that the LRU has only 40 positions, the real probability of a collision is too low. :return: The key to be used in the cache for storing this http_response """ # @see: test_bug_13_Dec_2012 to understand why we concat the uri to the # body before hashing uri_str = http_response.get_uri().url_string.encode('utf-8') body_str = http_response.body if isinstance(body_str, unicode): body_str = body_str.encode('utf-8', 'replace') _to_hash = body_str + uri_str # Added adler32 after finding some hash() collisions in builds hash_string = str(hash(_to_hash)) hash_string += str(zlib.adler32(_to_hash)) return hash_string def should_cache(self, http_response): """ Defines if this http_response parser should be cached or not :param http_response: The http response instance :return: True if we should cache the parser for this response """ return len(http_response.get_body()) < self.MAX_CACHEABLE_BODY_LEN def _test_parse_http_response(self, http_response, *args): """ Left here for testing! """ return DocumentParser(http_response) def _parse_http_response_in_worker(self, http_response, hash_string): """ This parses the http_response in a pool worker. This has two features: * We can kill the worker if the parser is taking too long * We can have different parsers :return: The DocumentParser instance """ event = multiprocessing.Event() self._parser_finished_events[hash_string] = event # Start the worker processes if needed self.start_workers() apply_args = (ProcessDocumentParser, http_response, self._processes, hash_string) # Push the task to the workers result = self._pool.apply_async(apply_with_return_error, (apply_args, )) try: parser_output = result.get(timeout=self.PARSER_TIMEOUT) except multiprocessing.TimeoutError: # Near the timeout error, so we make sure that the pid is still # running our "buggy" input pid = self._processes.pop(hash_string, None) if pid is not None: try: os.kill(pid, signal.SIGTERM) except OSError, ose: msg = 'An error occurred while killing the parser' \ ' process: "%s"' om.out.debug(msg % ose) msg = '[timeout] The parser took more than %s seconds'\ ' to complete parsing of "%s", killed it!' om.out.debug(msg % (self.PARSER_TIMEOUT, http_response.get_url())) # Act just like when there is no parser msg = 'There is no parser for "%s".' % http_response.get_url() raise BaseFrameworkException(msg) else:
class MultiProcessingDocumentParser(object): """ A document parser that performs all it's tasks in different processes and returns results to the main process. Also implements a parsing timeout just in case the parser enters an infinite loop. :author: Andres Riancho ([email protected]) """ # in seconds PARSER_TIMEOUT = 10 DEBUG = core_profiling_is_enabled() MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1 def __init__(self): self._pool = None self._processes = None self._start_lock = threading.RLock() def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # pylint: disable=E1101 # Keep track of which pid is processing which http response self._processes = manager.dict() # pylint: enable=E1101 # The pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, maxtasksperchild=20, initializer=init_worker, initargs=(log_queue, )) return self._pool def stop_workers(self): """ Stop the pool workers :return: None """ if self._pool is not None: self._pool.terminate() self._pool = None self._processes.clear() self._processes = None def _kill_parser_process(self, hash_string, http_response): """ Kill the process that's handling the parsing of http_response which can be identified by hash_string :param hash_string: The hash for the http_response :param http_response: The HTTP response which is being parsed :return: None """ # Near the timeout error, so we make sure that the pid is still # running our "buggy" input pid = self._processes.pop(hash_string, None) if pid is not None: try: os.kill(pid, signal.SIGTERM) except OSError, ose: msg = ('An error occurred while killing the parser' ' process: "%s"') om.out.debug(msg % ose) msg = ('[timeout] The parser took more than %s seconds to complete' ' parsing of "%s", killed it!') if user_wants_memory_profiling() or user_wants_pytracemalloc(): msg += (' Keep in mind that you\'re profiling memory usage and' ' there is a known bug where memory profilers break the' ' parser cache. See issue #9713 for more information' ' https://github.com/andresriancho/w3af/issues/9713') om.out.debug(msg % (self.PARSER_TIMEOUT, http_response.get_url()))
import os import multiprocessing from w3af.core.controllers.ci.detect import is_running_on_ci ARTIFACT_DIR = os.environ.get('CIRCLE_ARTIFACTS', '/tmp/') LOG_FILE = os.path.join(ARTIFACT_DIR, 'nosetests.log') # How many nosetests commands to run at the same time # # At CircleCI I've got 32 cores to use, but don't want to use them all with # nosetests (other important stuff like docker is running too), so I set a fixed # value if is_running_on_ci(): MAX_WORKERS = 10 else: MAX_WORKERS = max(multiprocessing.cpu_count() - 1, 2) # How many tests to send to each process # # Usually lower numbers are better here. A high chunk size will usually lead to # larger delays. CHUNK_SIZE = 3 # Where the test ids will be stored ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.pickle') JSON_ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.json') NOSETESTS = 'nosetests' # Not using code coverage (--with-cov --cov-report=xml) due to:
class MultiProcessingDocumentParser(object): """ A document parser that performs all it's tasks in different processes and returns results to the main process. Also implements a parsing timeout just in case the parser enters an infinite loop. :author: Andres Riancho ([email protected]) """ DEBUG = core_profiling_is_enabled() MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1 # Increasing the timeout when profiling is enabled seems to fix issue #9713 # # https://github.com/andresriancho/w3af/issues/9713 PROFILING_ENABLED = (user_wants_memory_profiling() or user_wants_pytracemalloc() or user_wants_cpu_profiling()) # in seconds PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10 def __init__(self): self._pool = None self._start_lock = threading.RLock() def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # Start the process pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, max_tasks=20, initializer=init_worker, initargs=(log_queue, )) return self._pool def stop_workers(self): """ Stop the pool workers :return: None """ if self._pool is not None: self._pool.stop() self._pool.join() self._pool = None def get_document_parser_for(self, http_response): """ Get a document parser for http_response This parses the http_response in a pool worker. This method has two features: * We can kill the worker if the parser is taking too long * We can have different parsers :param http_response: The http response instance :return: An instance of DocumentParser """ # Start the worker processes if needed self.start_workers() apply_args = (process_document_parser, http_response, self.DEBUG) # Push the task to the workers future = self._pool.schedule(apply_with_return_error, args=(apply_args, ), timeout=self.PARSER_TIMEOUT) try: parser_output = future.result() except TimeoutError: # Act just like when there is no parser msg = ('[timeout] The parser took more than %s seconds' ' to complete parsing of "%s", killed it!') args = (self.PARSER_TIMEOUT, http_response.get_url()) raise BaseFrameworkException(msg % args) else: if isinstance(parser_output, Error): parser_output.reraise() return parser_output def get_tags_by_filter(self, http_response, tags, yield_text=False): """ Return Tag instances for the tags which match the `tags` filter, parsing and all lxml stuff is done in another process and the Tag instances are sent to the main process (the one calling this method) through a pipe Some things to note: * Not all responses can be parsed, so I need to call DocumentParser and handle exceptions * The parser selected by DocumentParser might not have tags, and it might not have get_tags_by_filter. In this case just return an empty list * Just like get_document_parser_for we have a timeout in place, when we hit the timeout just return an empty list, this is not the best thing to do, but makes the plugin code easier to write (plugins would ignore this anyways) :param tags: The filter :param yield_text: Should we yield the tag text? :return: A list of Tag instances as defined in sgml.py :see: SGMLParser.get_tags_by_filter """ # Start the worker processes if needed self.start_workers() apply_args = (process_get_tags_by_filter, http_response, tags, yield_text, self.DEBUG) # Push the task to the workers future = self._pool.schedule(apply_with_return_error, args=(apply_args, ), timeout=self.PARSER_TIMEOUT) try: filtered_tags = future.result() except TimeoutError: # We hit a timeout, return an empty list return [] else: # There was an exception in the parser, maybe the HTML was really # broken, or it wasn't an HTML at all. if isinstance(filtered_tags, Error): return [] return filtered_tags