Esempio n. 1
0
    def __init__(self, crawl_infrastructure_plugins, w3af_core,
                 max_discovery_time):
        """
        :param crawl_infrastructure_plugins: Instances of CrawlInfrastructure
                                             plugins in a list
        :param w3af_core: The w3af core that we'll use for status reporting
        :param max_discovery_time: The max time (in seconds) to use for the
                                   discovery phase
        """
        super(CrawlInfrastructure, self).__init__(crawl_infrastructure_plugins,
                                                  w3af_core,
                                                  thread_name=self.get_name(),
                                                  max_pool_queued_tasks=100)
        self._max_discovery_time = int(max_discovery_time)

        # For filtering fuzzable requests found by plugins:
        self._variant_db = VariantDB()

        self._disabled_plugins = set()
        self._running = True
        self._report_max_time = True
        self._reported_found_urls = ScalableBloomFilter()

        # Override BaseConsumer.in_queue in order to have an ordered queue for
        # our crawling process.
        #
        # Read OrderedCachedQueue's documentation to understand why order is
        # important
        self.in_queue = OrderedCachedQueue(maxsize=10,
                                           name=self.get_name() + 'In')
Esempio n. 2
0
    def test_exceptions_no_fail_sync_pointer(self):
        q = OrderedCachedQueue(maxsize=2)
        q.put(create_simple_fuzzable_request(1))
        q.get()

        self.assertRaises(Exception, q.get, block=False)

        q.put(create_simple_fuzzable_request(1))
        self.assertEquals(read_fuzzable_request_parameter(q.get()), 1)
Esempio n. 3
0
    def test_put_none_after_fuzzable_request(self):
        q = OrderedCachedQueue(maxsize=2)

        q.put(create_simple_fuzzable_request(1))
        q.put(None)

        # Reads the fuzzable request
        q.get()

        # Reads the None
        self.assertIsNone(q.get())
Esempio n. 4
0
    def test_read_in_order(self):
        q = OrderedCachedQueue(maxsize=2)
        hash_list = []

        for i in xrange(5):
            fr = create_simple_fuzzable_request(i)
            hash_list.append(fr.get_hash())
            q.put(fr)

        unordered_hash_list = hash_list[:]
        hash_list.sort()

        self.assertNotEqual(unordered_hash_list, hash_list)

        for i in xrange(4):
            fr = q.get()
            self.assertEqual(fr.get_hash(), hash_list[i])
Esempio n. 5
0
    def test_prefer_memory_over_disk(self):
        q = OrderedCachedQueue(maxsize=2)

        # These two go to the in memory queue
        q.put(create_simple_fuzzable_request(1))
        q.put(create_simple_fuzzable_request(2))

        # This one goes to the disk queue
        q.put(create_simple_fuzzable_request(3))

        # Read one from memory
        q.get()
        self.assertEqual(len(q.memory), 1)
        self.assertEqual(len(q.disk), 1)

        # Write one to memory
        q.put(create_simple_fuzzable_request(3))
        self.assertEqual(len(q.memory), 2)
        self.assertEqual(len(q.disk), 1)
Esempio n. 6
0
    def test_put_same_fuzzable_request_twice(self):
        q = OrderedCachedQueue(maxsize=2)

        q.put(create_simple_fuzzable_request(1))
        q.put(create_simple_fuzzable_request(1))

        self.assertEqual(q.get(), q.get())
Esempio n. 7
0
    def test_join_memory_and_disk(self):
        q = OrderedCachedQueue(maxsize=2)
        for x in range(10):
            q.put(create_simple_fuzzable_request(x))

        def queue_get_after_delay(queue):
            time.sleep(1)

            for x in range(2):
                queue.get()
                queue.task_done()

            time.sleep(1)

            for x in range(8):
                queue.get()
                queue.task_done()

        t = threading.Thread(target=queue_get_after_delay,
                             args=(q,))
        t.start()

        start = time.time()

        # This should take 3 seconds
        q.join()

        spent = time.time() - start

        self.assertGreater(spent, 2)
Esempio n. 8
0
    def test_join_memory(self):
        q = OrderedCachedQueue(maxsize=2)
        q.put(create_simple_fuzzable_request(1))

        def queue_get_after_delay(queue):
            time.sleep(1)
            queue.get()
            queue.task_done()

        t = threading.Thread(target=queue_get_after_delay,
                             args=(q,))
        t.start()

        start = time.time()

        # This should take 1 second
        q.join()

        spent = time.time() - start

        self.assertGreater(spent, 1)
Esempio n. 9
0
    def test_simple_rpm_speed(self):
        q = OrderedCachedQueue()

        self.assertEqual(0.0, q.get_input_rpm())
        self.assertEqual(0.0, q.get_output_rpm())

        for i in xrange(4):
            q.put(create_simple_fuzzable_request(i))
            # 20 RPM
            time.sleep(3)

        self.assertEqual(q.qsize(), 4)

        self.assertGreater(q.get_input_rpm(), 19)
        self.assertLess(q.get_input_rpm(), 20)

        for i in xrange(4):
            q.get()
            # 60 RPM
            time.sleep(1)

        self.assertGreater(q.get_output_rpm(), 59)
        self.assertLess(q.get_output_rpm(), 60)
        self.assertEqual(q.qsize(), 0)
Esempio n. 10
0
    def test_add_exceed_memory(self):
        q = OrderedCachedQueue(maxsize=2)

        # These two go to the in memory queue
        q.put(create_simple_fuzzable_request(1))
        q.put(create_simple_fuzzable_request(2))

        self.assertEqual(q.qsize(), 2)
        self.assertEqual(len(q.memory), 2)

        # This one goes to the disk queue
        q.put(create_simple_fuzzable_request(3))

        self.assertEqual(q.qsize(), 3)
        self.assertEqual(len(q.memory), 2)
        self.assertEqual(len(q.disk), 1)

        # Get all
        self.assertEqual(read_fuzzable_request_parameter(q.get()), 1)

        self.assertEqual(len(q.memory), 1)
        self.assertEqual(len(q.disk), 1)

        self.assertEqual(read_fuzzable_request_parameter(q.get()), 2)

        self.assertEqual(len(q.memory), 0)
        self.assertEqual(len(q.disk), 1)

        self.assertEqual(read_fuzzable_request_parameter(q.get()), 3)

        self.assertEqual(len(q.memory), 0)
        self.assertEqual(len(q.disk), 0)

        self.assertEqual(q.qsize(), 0)
Esempio n. 11
0
class CrawlInfrastructure(BaseConsumer):
    """
    Consumer thread that takes fuzzable requests from the input Queue that is
    seeded by the core, sends each fr to all crawl and infrastructure plugins,
    get()'s the output from those plugins and puts them in the input Queue
    again for continuing with the discovery process.
    """

    def __init__(self, crawl_infrastructure_plugins, w3af_core,
                 max_discovery_time):
        """
        :param crawl_infrastructure_plugins: Instances of CrawlInfrastructure
                                             plugins in a list
        :param w3af_core: The w3af core that we'll use for status reporting
        :param max_discovery_time: The max time (in seconds) to use for the
                                   discovery phase
        """
        super(CrawlInfrastructure, self).__init__(crawl_infrastructure_plugins,
                                                  w3af_core,
                                                  thread_name=self.get_name(),
                                                  max_pool_queued_tasks=100)
        self._max_discovery_time = int(max_discovery_time)

        # For filtering fuzzable requests found by plugins:
        self._variant_db = VariantDB()

        self._disabled_plugins = set()
        self._running = True
        self._report_max_time = True
        self._reported_found_urls = ScalableBloomFilter()

        # Override BaseConsumer.in_queue in order to have an ordered queue for
        # our crawling process.
        #
        # Read OrderedCachedQueue's documentation to understand why order is
        # important
        self.in_queue = OrderedCachedQueue(maxsize=10,
                                           name=self.get_name() + 'In')

    def get_name(self):
        return 'CrawlInfra'

    def run(self):
        """
        Consume the queue items, sending them to the plugins which are then
        going to find vulnerabilities, new URLs, etc.
        """
        while True:

            try:
                work_unit = self.in_queue.get(timeout=0.1)
            except KeyboardInterrupt:
                # https://github.com/andresriancho/w3af/issues/9587
                #
                # If we don't do this, the thread will die and will never
                # process the POISON_PILL, which will end up in an endless
                # wait for .join()
                continue

            except Queue.Empty:
                # pylint: disable=E1120
                try:
                    self._route_all_plugin_results()
                except KeyboardInterrupt:
                    continue
                # pylint: enable=E1120
            else:
                if work_unit == POISON_PILL:

                    self._log_queue_sizes()

                    try:
                        self._process_poison_pill()
                    except Exception, e:
                        msg = 'An exception was found while processing poison pill: "%s"'
                        om.out.debug(msg % e)
                    finally:
                        self._running = False
                        self.in_queue.task_done()
                        break

                else:
                    # With specific error/success handling just for debugging
                    try:
                        self._consume(work_unit)
                    finally:
                        self.in_queue.task_done()

                    # Free memory
                    work_unit = None
Esempio n. 12
0
class CrawlInfrastructure(BaseConsumer):
    """
    Consumer thread that takes fuzzable requests from the input Queue that is
    seeded by the core, sends each fr to all crawl and infrastructure plugins,
    get()'s the output from those plugins and puts them in the input Queue
    again for continuing with the discovery process.
    """
    def __init__(self, crawl_infrastructure_plugins, w3af_core,
                 max_discovery_time):
        """
        :param crawl_infrastructure_plugins: Instances of CrawlInfrastructure
                                             plugins in a list
        :param w3af_core: The w3af core that we'll use for status reporting
        :param max_discovery_time: The max time (in seconds) to use for the
                                   discovery phase
        """
        super(CrawlInfrastructure, self).__init__(crawl_infrastructure_plugins,
                                                  w3af_core,
                                                  thread_name=self.get_name(),
                                                  max_pool_queued_tasks=100)
        self._max_discovery_time = int(max_discovery_time)

        # For filtering fuzzable requests found by plugins:
        self._variant_db = VariantDB()

        self._disabled_plugins = set()
        self._running = True
        self._report_max_time = True
        self._reported_found_urls = ScalableBloomFilter()

        # Override BaseConsumer.in_queue in order to have an ordered queue for
        # our crawling process.
        #
        # Read OrderedCachedQueue's documentation to understand why order is
        # important
        self.in_queue = OrderedCachedQueue(maxsize=10,
                                           name=self.get_name() + 'In')

    def get_name(self):
        return 'CrawlInfra'

    def run(self):
        """
        Consume the queue items, sending them to the plugins which are then
        going to find vulnerabilities, new URLs, etc.
        """
        while True:

            try:
                work_unit = self.in_queue.get(timeout=0.1)
            except KeyboardInterrupt:
                # https://github.com/andresriancho/w3af/issues/9587
                #
                # If we don't do this, the thread will die and will never
                # process the POISON_PILL, which will end up in an endless
                # wait for .join()
                continue

            except Queue.Empty:
                # pylint: disable=E1120
                try:
                    self._route_all_plugin_results()
                except KeyboardInterrupt:
                    continue
                # pylint: enable=E1120
            else:
                if work_unit == POISON_PILL:

                    try:
                        # Close the pool and wait for everyone to finish
                        self._threadpool.close()
                        self._threadpool.join()
                        self._threadpool = None

                        self._running = False
                        self._teardown()
                    finally:
                        # Finish this consumer and everyone consuming the output
                        self._out_queue.put(POISON_PILL)
                        self.in_queue.task_done()
                        self.set_has_finished()
                        break

                else:
                    # With specific error/success handling just for debugging
                    try:
                        self._consume(work_unit)
                    finally:
                        self.in_queue.task_done()

                    # Free memory
                    work_unit = None

    def _teardown(self, plugin=None):
        """
        End plugins
        """
        to_teardown = self._consumer_plugins

        if plugin is not None:
            to_teardown = [plugin]

        # When we disable a plugin because it raised a RunOnceException,
        # we call .end(), so no need to call the same method twice
        to_teardown = set(to_teardown) - self._disabled_plugins

        msg = 'Starting CrawlInfra consumer _teardown() with %s plugins.'
        om.out.debug(msg % len(to_teardown))

        for plugin in to_teardown:
            om.out.debug('Calling %s.end().' % plugin.get_name())
            start_time = time.time()

            try:
                plugin.end()
            except ScanMustStopException:
                # If we reach this exception here we don't care much
                # since the scan is ending already. The log message stating
                # that the scan will end because of this error was already
                # delivered by the HTTP client.
                #
                # We `pass` instead of `break` because some plugins might
                # still be able to `end()` without sending HTTP requests to
                # the remote server
                msg_fmt = ('Spent %.2f seconds running %s.end() until a'
                           ' scan must stop exception was raised.')
                self._log_end_took(msg_fmt, start_time, plugin)

            except Exception, e:
                msg_fmt = ('Spent %.2f seconds running %s.end() until an'
                           ' unhandled exception was found.')
                self._log_end_took(msg_fmt, start_time, plugin)

                self.handle_exception('crawl', plugin.get_name(),
                                      'plugin.end()', e)

            else:
                msg_fmt = 'Spent %.2f seconds running %s.end().'
                self._log_end_took(msg_fmt, start_time, plugin)

            finally: