コード例 #1
0
ファイル: test_strategy.py プロジェクト: johnjohnsp1/w3af
    def test_1557_random_number_of_results(self):
        """
        Pseudo-random number of vulnerabilities found in audit phase (xss)

        https://github.com/andresriancho/w3af/issues/1557
        """
        script = TEST_SCRIPT_1557 % (OUTPUT_PATH, get_wavsep_http())
        file(SCRIPT_PATH, "w").write(script)

        python_executable = sys.executable

        VULN_STRING = "A Cross Site Scripting vulnerability was found at"
        URL_VULN_RE = re.compile('%s: "(.*?)"' % VULN_STRING)
        all_previous_vulns = []

        loops = 2 if is_running_on_ci() else 10

        for i in xrange(loops):
            print("Start run #%s" % i)
            found_vulns = set()

            p = subprocess.Popen(
                [python_executable, "w3af_console", "-n", "-s", SCRIPT_PATH],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=subprocess.PIPE,
                shell=False,
                universal_newlines=True,
            )

            stdout, stderr = p.communicate()
            i_vuln_count = stdout.count(VULN_STRING)
            print("%s vulnerabilities found" % i_vuln_count)

            self.assertNotEqual(i_vuln_count, 0, stdout)

            for line in stdout.split("\n"):
                if VULN_STRING in line:
                    found_vulns.add(URL_VULN_RE.search(line).group(1))

            for previous_found in all_previous_vulns:
                self.assertEqual(found_vulns, previous_found)

            all_previous_vulns.append(found_vulns)
コード例 #2
0
    def test_1557_random_number_of_results(self):
        """
        Pseudo-random number of vulnerabilities found in audit phase (xss)

        https://github.com/andresriancho/w3af/issues/1557
        """
        script = TEST_SCRIPT_1557 % (OUTPUT_PATH, get_wavsep_http())
        file(SCRIPT_PATH, 'w').write(script)

        python_executable = sys.executable

        VULN_STRING = 'A Cross Site Scripting vulnerability was found at'
        URL_VULN_RE = re.compile('%s: "(.*?)"' % VULN_STRING)
        all_previous_vulns = []

        loops = 2 if is_running_on_ci() else 10

        for i in xrange(loops):
            print('Start run #%s' % i)
            found_vulns = set()

            p = subprocess.Popen(
                [python_executable, 'w3af_console', '-n', '-s', SCRIPT_PATH],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=subprocess.PIPE,
                shell=False,
                universal_newlines=True)

            stdout, stderr = p.communicate()
            i_vuln_count = stdout.count(VULN_STRING)
            print('%s vulnerabilities found' % i_vuln_count)

            self.assertNotEqual(i_vuln_count, 0, stdout)

            for line in stdout.split('\n'):
                if VULN_STRING in line:
                    found_vulns.add(URL_VULN_RE.search(line).group(1))

            for previous_found in all_previous_vulns:
                self.assertEqual(found_vulns, previous_found)

            all_previous_vulns.append(found_vulns)
コード例 #3
0
ファイル: only_ci_decorator.py プロジェクト: 0x554simon/w3af
 def _inner_func(*args, **kwds):
     if is_running_on_ci():
         return decorated_func(*args, **kwds)
コード例 #4
0
ファイル: mp_document_parser.py プロジェクト: woverines/w3af
class MultiProcessingDocumentParser(object):
    """
    A document parser that performs all it's tasks in different processes and
    returns results to the main process.

    Also implements a parsing timeout just in case the parser enters an infinite
    loop.

    :author: Andres Riancho ([email protected])
    """
    DEBUG = core_profiling_is_enabled()
    MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() /
                                                2) or 1

    # Increasing the timeout when profiling is enabled seems to fix issue #9713
    #
    # https://github.com/andresriancho/w3af/issues/9713
    PROFILING_ENABLED = (user_wants_memory_profiling()
                         or user_wants_pytracemalloc()
                         or user_wants_cpu_profiling())

    # in seconds
    PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10

    # Document parsers can go crazy on memory usage when parsing some very
    # specific HTML / PDF documents. Sometimes when this happens the operating
    # system does an out of memory (OOM) kill of a "randomly chosen" process.
    #
    # We limit the memory which can be used by parsing processes to this constant
    #
    # The feature was tested in test_pebble_limit_memory_usage.py
    MEMORY_LIMIT = get_memory_limit()

    def __init__(self):
        self._pool = None
        self._start_lock = threading.RLock()

    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:

                # Start the process pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         max_tasks=20,
                                         initializer=init_worker,
                                         initargs=(log_queue,
                                                   self.MEMORY_LIMIT))

        return self._pool

    def stop_workers(self):
        """
        Stop the pool workers
        :return: None
        """
        if self._pool is not None:
            self._pool.stop()
            self._pool.join()
            self._pool = None

    def get_document_parser_for(self, http_response):
        """
        Get a document parser for http_response

        This parses the http_response in a pool worker. This method has two
        features:
            * We can kill the worker if the parser is taking too long
            * We can have different parsers

        :param http_response: The http response instance
        :return: An instance of DocumentParser
        """
        # Start the worker processes if needed
        self.start_workers()

        apply_args = (process_document_parser, http_response, self.DEBUG)

        # Push the task to the workers
        try:
            future = self._pool.schedule(apply_with_return_error,
                                         args=(apply_args, ),
                                         timeout=self.PARSER_TIMEOUT)
        except RuntimeError, rte:
            # We get here when the pebble pool management thread dies and
            # suddenly starts answering all calls with:
            #
            # RuntimeError('Unexpected error within the Pool')
            #
            # The scan needs to stop because we can't parse any more
            # HTTP responses, which is a very critical part of the process
            msg = str(rte)
            raise ScanMustStopException(msg)

        try:
            parser_output = future.result()
        except TimeoutError:
            msg = ('[timeout] The parser took more than %s seconds'
                   ' to complete parsing of "%s", killed it!')
            args = (self.PARSER_TIMEOUT, http_response.get_url())
            raise TimeoutError(msg % args)

        # We still need to perform some error handling here...
        if isinstance(parser_output, Error):
            if isinstance(parser_output.exc_value, MemoryError):
                msg = ('The parser exceeded the memory usage limit of %s bytes'
                       ' while trying to parse "%s". The parser was stopped in'
                       ' order to prevent OOM issues.')
                args = (self.MEMORY_LIMIT, http_response.get_url())
                om.out.debug(msg % args)
                raise MemoryError(msg % args)

            parser_output.reraise()

        # Success!
        return parser_output
コード例 #5
0
ファイル: mp_document_parser.py プロジェクト: softsky/w3af
class MultiProcessingDocumentParser(object):
    """
    A document parser that performs all it's tasks in different processes and
    returns results to the main process.

    Also implements a parsing timeout just in case the parser enters an infinite
    loop.

    :author: Andres Riancho ([email protected])
    """
    DEBUG = core_profiling_is_enabled()
    MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1

    # Increasing the timeout when profiling is enabled seems to fix issue #9713
    #
    # https://github.com/andresriancho/w3af/issues/9713
    PROFILING_ENABLED = (user_wants_memory_profiling() or
                         user_wants_pytracemalloc() or
                         user_wants_cpu_profiling())

    # in seconds
    PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10

    def __init__(self):
        self._pool = None
        self._processes = None
        self._start_lock = threading.RLock()

    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:

                # pylint: disable=E1101
                # Keep track of which pid is processing which http response
                self._processes = manager.dict()
                # pylint: enable=E1101

                # The pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         maxtasksperchild=20,
                                         initializer=init_worker,
                                         initargs=(log_queue,))

        return self._pool

    def stop_workers(self):
        """
        Stop the pool workers
        :return: None
        """
        if self._pool is not None:
            self._pool.terminate()
            self._pool = None

        if self._processes is not None:
            self._processes.clear()
            self._processes = None

    def _kill_parser_process(self, hash_string, http_response):
        """
        Kill the process that's handling the parsing of http_response which
        can be identified by hash_string

        :param hash_string: The hash for the http_response
        :param http_response: The HTTP response which is being parsed
        :return: None
        """
        # Near the timeout error, so we make sure that the pid is still
        # running our "buggy" input
        pid = self._processes.pop(hash_string, None)
        if pid is not None:
            try:
                os.kill(pid, signal.SIGTERM)
            except OSError, ose:
                msg = ('An error occurred while killing the parser'
                       ' process: "%s"')
                om.out.debug(msg % ose)

        msg = ('[timeout] The parser took more than %s seconds to complete'
               ' parsing of "%s", killed it!')

        if self.PROFILING_ENABLED:
            msg += (' You are running a profiling session which requires more'
                    ' CPU and resources to be run; the'
                    ' MultiProcessingDocumentParser failed to parse the HTML'
                    ' document. Try to increase the PARSER_TIMEOUT and try'
                    ' again.\n\n'
                    'This issue invalidates the profiling session!\n\n'
                    'See issue #9713 for more information'
                    ' https://github.com/andresriancho/w3af/issues/9713')

        log_function = om.out.error if self.PROFILING_ENABLED else om.out.debug
        log_function(msg % (self.PARSER_TIMEOUT, http_response.get_url()))
コード例 #6
0
ファイル: constants.py プロジェクト: xiaofengtongxue/w3af
import os
import multiprocessing

from w3af.core.controllers.ci.detect import is_running_on_ci

ARTIFACT_DIR = os.environ.get('CIRCLE_ARTIFACTS', '/tmp/')
LOG_FILE = os.path.join(ARTIFACT_DIR, 'nosetests.log')

# How many nosetests commands to run at the same time
#
# At CircleCI I've got 32 cores to use, but don't want to use them all with
# nosetests (other important stuff like docker is running too), so I set a fixed
# value
if is_running_on_ci():
    MAX_WORKERS = 20
else:
    MAX_WORKERS = max(multiprocessing.cpu_count() - 1, 2)

# How many tests to send to each process
#
# Usually lower numbers are better here. A high chunk size will usually lead to
# larger delays.
CHUNK_SIZE = 3

# Where the test ids will be stored
ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.pickle')
JSON_ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.json')

NOSETESTS = 'nosetests'

# Not using code coverage (--with-cov --cov-report=xml) due to:
コード例 #7
0
ファイル: parser_cache.py プロジェクト: xenobyte/w3af
class ParserCache(object):
    """
    This class is a document parser cache.

    :author: Andres Riancho ([email protected])
    """
    LRU_LENGTH = 40
    MAX_CACHEABLE_BODY_LEN = 1024 * 1024
    PARSER_TIMEOUT = 60  # in seconds
    DEBUG = False
    MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() /
                                                2) or 1

    def __init__(self):
        self._cache = SynchronizedLRUDict(self.LRU_LENGTH)
        self._pool = None
        self._processes = None
        self._parser_finished_events = {}
        self._start_lock = threading.RLock()

        # These are here for debugging:
        self._archive = set()
        self._from_LRU = 0.0
        self._calculated_more_than_once = 0.0
        self._total = 0.0

    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:
                # Keep track of which pid is processing which http response
                # pylint: disable=E1101
                self._processes = manager.dict()
                # pylint: enable=E1101

                # The pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         maxtasksperchild=25,
                                         initializer=init_worker,
                                         initargs=(log_queue, ))

        return self._pool

    def stop_workers(self):
        """
        Stop the pool workers
        :return: None
        """
        if self._pool is not None:
            self._pool.terminate()
            self._pool = None
            self._processes = None

        # We don't need this data anymore
        self._cache.clear()

        if self.DEBUG:
            re_calc_rate = (self._calculated_more_than_once / self._total)
            print('parser_cache LRU rate: %s' % (self._from_LRU / self._total))
            print('parser_cache re-calculation rate: %s' % re_calc_rate)
            print('parser_cache size: %s' % self.LRU_LENGTH)

    def get_cache_key(self, http_response):
        """
        Before I used md5, but I realized that it was unnecessary. I
        experimented a little bit with python's hash functions and the builtin
        hash was the fastest.

        At first I thought that the built-in hash wasn't good enough, as it
        could create collisions... but... given that the LRU has only 40
        positions, the real probability of a collision is too low.

        :return: The key to be used in the cache for storing this http_response
        """
        # @see: test_bug_13_Dec_2012 to understand why we concat the uri to the
        #       body before hashing
        uri_str = http_response.get_uri().url_string.encode('utf-8')

        body_str = http_response.body
        if isinstance(body_str, unicode):
            body_str = body_str.encode('utf-8', 'replace')

        _to_hash = body_str + uri_str

        # Added adler32 after finding some hash() collisions in builds
        hash_string = str(hash(_to_hash))
        hash_string += str(zlib.adler32(_to_hash))
        return hash_string

    def should_cache(self, http_response):
        """
        Defines if this http_response parser should be cached or not

        :param http_response: The http response instance
        :return: True if we should cache the parser for this response
        """
        return len(http_response.get_body()) < self.MAX_CACHEABLE_BODY_LEN

    def _test_parse_http_response(self, http_response, *args):
        """
        Left here for testing!
        """
        return DocumentParser(http_response)

    def _parse_http_response_in_worker(self, http_response, hash_string):
        """
        This parses the http_response in a pool worker. This has two features:
            * We can kill the worker if the parser is taking too long
            * We can have different parsers

        :return: The DocumentParser instance
        """
        event = multiprocessing.Event()
        self._parser_finished_events[hash_string] = event

        # Start the worker processes if needed
        self.start_workers()

        apply_args = (ProcessDocumentParser, http_response, self._processes,
                      hash_string)

        # Push the task to the workers
        result = self._pool.apply_async(apply_with_return_error,
                                        (apply_args, ))

        try:
            parser_output = result.get(timeout=self.PARSER_TIMEOUT)
        except multiprocessing.TimeoutError:
            # Near the timeout error, so we make sure that the pid is still
            # running our "buggy" input
            pid = self._processes.pop(hash_string, None)
            if pid is not None:
                try:
                    os.kill(pid, signal.SIGTERM)
                except OSError, ose:
                    msg = 'An error occurred while killing the parser' \
                          ' process: "%s"'
                    om.out.debug(msg % ose)

            msg = '[timeout] The parser took more than %s seconds'\
                  ' to complete parsing of "%s", killed it!'

            om.out.debug(msg % (self.PARSER_TIMEOUT, http_response.get_url()))

            # Act just like when there is no parser
            msg = 'There is no parser for "%s".' % http_response.get_url()
            raise BaseFrameworkException(msg)
        else:
コード例 #8
0
class MultiProcessingDocumentParser(object):
    """
    A document parser that performs all it's tasks in different processes and
    returns results to the main process.

    Also implements a parsing timeout just in case the parser enters an infinite
    loop.

    :author: Andres Riancho ([email protected])
    """
    # in seconds
    PARSER_TIMEOUT = 10
    DEBUG = core_profiling_is_enabled()
    MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() /
                                                2) or 1

    def __init__(self):
        self._pool = None
        self._processes = None
        self._start_lock = threading.RLock()

    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:

                # pylint: disable=E1101
                # Keep track of which pid is processing which http response
                self._processes = manager.dict()
                # pylint: enable=E1101

                # The pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         maxtasksperchild=20,
                                         initializer=init_worker,
                                         initargs=(log_queue, ))

        return self._pool

    def stop_workers(self):
        """
        Stop the pool workers
        :return: None
        """
        if self._pool is not None:
            self._pool.terminate()
            self._pool = None

            self._processes.clear()
            self._processes = None

    def _kill_parser_process(self, hash_string, http_response):
        """
        Kill the process that's handling the parsing of http_response which
        can be identified by hash_string

        :param hash_string: The hash for the http_response
        :param http_response: The HTTP response which is being parsed
        :return: None
        """
        # Near the timeout error, so we make sure that the pid is still
        # running our "buggy" input
        pid = self._processes.pop(hash_string, None)
        if pid is not None:
            try:
                os.kill(pid, signal.SIGTERM)
            except OSError, ose:
                msg = ('An error occurred while killing the parser'
                       ' process: "%s"')
                om.out.debug(msg % ose)

        msg = ('[timeout] The parser took more than %s seconds to complete'
               ' parsing of "%s", killed it!')

        if user_wants_memory_profiling() or user_wants_pytracemalloc():
            msg += (' Keep in mind that you\'re profiling memory usage and'
                    ' there is a known bug where memory profilers break the'
                    ' parser cache. See issue #9713 for more information'
                    ' https://github.com/andresriancho/w3af/issues/9713')

        om.out.debug(msg % (self.PARSER_TIMEOUT, http_response.get_url()))
コード例 #9
0
ファイル: constants.py プロジェクト: 0x554simon/w3af
import os
import multiprocessing

from w3af.core.controllers.ci.detect import is_running_on_ci


ARTIFACT_DIR = os.environ.get('CIRCLE_ARTIFACTS', '/tmp/')
LOG_FILE = os.path.join(ARTIFACT_DIR, 'nosetests.log')

# How many nosetests commands to run at the same time
#
# At CircleCI I've got 32 cores to use, but don't want to use them all with
# nosetests (other important stuff like docker is running too), so I set a fixed
# value
if is_running_on_ci():
    MAX_WORKERS = 10
else:
    MAX_WORKERS = max(multiprocessing.cpu_count() - 1, 2)

# How many tests to send to each process
#
# Usually lower numbers are better here. A high chunk size will usually lead to
# larger delays.
CHUNK_SIZE = 3

# Where the test ids will be stored
ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.pickle')
JSON_ID_FILE = os.path.join(ARTIFACT_DIR, 'noseids.json')

NOSETESTS = 'nosetests'
# Not using code coverage (--with-cov --cov-report=xml) due to:
コード例 #10
0
ファイル: only_ci_decorator.py プロジェクト: webvul/webfuzzer
 def _inner_func(*args, **kwds):
     if is_running_on_ci():
         return decorated_func(*args, **kwds)
コード例 #11
0
class MultiProcessingDocumentParser(object):
    """
    A document parser that performs all it's tasks in different processes and
    returns results to the main process.

    Also implements a parsing timeout just in case the parser enters an infinite
    loop.

    :author: Andres Riancho ([email protected])
    """
    DEBUG = core_profiling_is_enabled()
    MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() /
                                                2) or 1

    # Increasing the timeout when profiling is enabled seems to fix issue #9713
    #
    # https://github.com/andresriancho/w3af/issues/9713
    PROFILING_ENABLED = (user_wants_memory_profiling()
                         or user_wants_pytracemalloc()
                         or user_wants_cpu_profiling())

    # in seconds
    PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10

    def __init__(self):
        self._pool = None
        self._start_lock = threading.RLock()

    def start_workers(self):
        """
        Start the pool and workers
        :return: The pool instance
        """
        with self._start_lock:
            if self._pool is None:

                # Start the process pool
                log_queue = om.manager.get_in_queue()
                self._pool = ProcessPool(self.MAX_WORKERS,
                                         max_tasks=20,
                                         initializer=init_worker,
                                         initargs=(log_queue, ))

        return self._pool

    def stop_workers(self):
        """
        Stop the pool workers
        :return: None
        """
        if self._pool is not None:
            self._pool.stop()
            self._pool.join()
            self._pool = None

    def get_document_parser_for(self, http_response):
        """
        Get a document parser for http_response

        This parses the http_response in a pool worker. This method has two
        features:
            * We can kill the worker if the parser is taking too long
            * We can have different parsers

        :param http_response: The http response instance
        :return: An instance of DocumentParser
        """
        # Start the worker processes if needed
        self.start_workers()

        apply_args = (process_document_parser, http_response, self.DEBUG)

        # Push the task to the workers
        future = self._pool.schedule(apply_with_return_error,
                                     args=(apply_args, ),
                                     timeout=self.PARSER_TIMEOUT)

        try:
            parser_output = future.result()
        except TimeoutError:
            # Act just like when there is no parser
            msg = ('[timeout] The parser took more than %s seconds'
                   ' to complete parsing of "%s", killed it!')

            args = (self.PARSER_TIMEOUT, http_response.get_url())

            raise BaseFrameworkException(msg % args)
        else:
            if isinstance(parser_output, Error):
                parser_output.reraise()

        return parser_output

    def get_tags_by_filter(self, http_response, tags, yield_text=False):
        """
        Return Tag instances for the tags which match the `tags` filter,
        parsing and all lxml stuff is done in another process and the Tag
        instances are sent to the main process (the one calling this method)
        through a pipe

        Some things to note:
            * Not all responses can be parsed, so I need to call DocumentParser
              and handle exceptions

            * The parser selected by DocumentParser might not have tags, and
              it might not have get_tags_by_filter. In this case just return an
              empty list

            * Just like get_document_parser_for we have a timeout in place,
              when we hit the timeout just return an empty list, this is not
              the best thing to do, but makes the plugin code easier to write
              (plugins would ignore this anyways)

        :param tags: The filter
        :param yield_text: Should we yield the tag text?
        :return: A list of Tag instances as defined in sgml.py

        :see: SGMLParser.get_tags_by_filter
        """
        # Start the worker processes if needed
        self.start_workers()

        apply_args = (process_get_tags_by_filter, http_response, tags,
                      yield_text, self.DEBUG)

        # Push the task to the workers
        future = self._pool.schedule(apply_with_return_error,
                                     args=(apply_args, ),
                                     timeout=self.PARSER_TIMEOUT)

        try:
            filtered_tags = future.result()
        except TimeoutError:
            # We hit a timeout, return an empty list
            return []
        else:
            # There was an exception in the parser, maybe the HTML was really
            # broken, or it wasn't an HTML at all.
            if isinstance(filtered_tags, Error):
                return []

        return filtered_tags