def test_process_any_cookies_when_no_processing_resources(self):
        processor_manager = BasicProcessorManager(
            self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders))

        complete = Semaphore(0)

        def on_complete(*args):
            complete.release()

        self.cookie_jar.mark_as_complete = MagicMock(side_effect=on_complete)

        rule_lock = Semaphore(0)
        match_lock = Lock()
        match_lock.acquire()

        def matching_criteria(cookie: Cookie, _:Context) -> bool:
            match_lock.release()
            rule_lock.acquire()
            return True

        self.rules.append(Rule(matching_criteria, lambda *args: True, RULE_IDENTIFIER))

        self.cookie_jar.mark_for_processing(self.cookie.identifier)
        processor_manager.process_any_cookies()
        match_lock.acquire()
        # Processor should have locked at this point - i.e. 0 free processors

        self.cookie_jar.mark_for_processing("/other/cookie")
        processor_manager.process_any_cookies()
        # The fact that there are more cookies should be "remembered" by the processor manager

        # Change the rules for the next cookie to be processed
        self.rules.pop()
        rule_execute_monitor = MagicMock(return_value=False)
        self.rules.append(Rule(lambda *args: True, rule_execute_monitor, RULE_IDENTIFIER))

        # Free the processor to complete the first cookie
        rule_lock.release()
        rule_lock.release()

        # Wait for both cookies to be processed
        completed = 0
        while completed != 2:
            complete.acquire()
            completed += 1

        self.cookie_jar.mark_as_complete.assert_has_calls([call(self.cookie.identifier), call("/other/cookie")])
        assert len(self.rules) == 1
        self.assertEqual(rule_execute_monitor.call_count, 1)
    def setUp(self):
        self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__)
        self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__)

        self.resource_accessor = StubContext()

        # Setup enrichment
        self.enrichment_loader_source = EnrichmentLoaderSource(
            self.enrichment_loaders_directory, self.resource_accessor)
        self.enrichment_loader_source.start()

        # Setup cookie jar
        self.cookie_jar = create_magic_mock_cookie_jar()

        # Setup rules source
        self.rules_source = RuleSource(self.rules_directory, self.resource_accessor)
        self.rules_source.start()

        # Setup the data processor manager
        self.processor_manager = BasicProcessorManager(
            self.cookie_jar, self.rules_source, self.enrichment_loader_source)

        def cookie_jar_connector(*args):
            self.processor_manager.process_any_cookies()

        self.cookie_jar.add_listener(cookie_jar_connector)
    def setUp(self):
        self.cookie_jar = create_magic_mock_cookie_jar()

        self.rules = []
        self.enrichment_loaders = []

        self.notifications = [Notification("a", "b"), Notification("c", "d")]
        self.cookie = Cookie(COOKIE_IDENTIFIER)

        self.enrichment_loaders = self.enrichment_loaders
        self.processor_manager = BasicProcessorManager(
            self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders))
class TestBasicProcessorManager(unittest.TestCase):
    """
    Tests for `BasicProcessorManager`.
    """
    def setUp(self):
        self.cookie_jar = create_magic_mock_cookie_jar()

        self.rules = []
        self.enrichment_loaders = []

        self.notifications = [Notification("a", "b"), Notification("c", "d")]
        self.cookie = Cookie(COOKIE_IDENTIFIER)

        self.enrichment_loaders = self.enrichment_loaders
        self.processor_manager = BasicProcessorManager(
            self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders))

    def test_init_with_less_than_one_thread(self):
        self.assertRaises(
            ValueError, BasicProcessorManager, self.cookie_jar, ListDataSource(self.rules), self.enrichment_loaders, 0)

    def test_process_any_cookies_when_no_jobs(self):
        complete = Lock()
        complete.acquire()

        def on_get_next_for_processing(*args):
            complete.release()

        self.cookie_jar.get_next_for_processing = MagicMock(side_effect=on_get_next_for_processing)

        self.processor_manager.process_any_cookies()
        complete.acquire()

        self.cookie_jar.get_next_for_processing.assert_called_once_with()
        self.cookie_jar.mark_as_complete.assert_not_called()

    def test_process_any_cookies_when_jobs(self):
        complete = Semaphore(0)

        def on_complete(*args):
            complete.release()

        self.cookie_jar.mark_as_complete = MagicMock(side_effect=on_complete)

        self.rules.append(Rule(lambda *args: True, lambda *args: True, RULE_IDENTIFIER))

        number_to_process = 100
        for i in range(number_to_process):
            self.cookie_jar.mark_for_processing("%s/%s" % (COOKIE_IDENTIFIER, i))
            Thread(target=self.processor_manager.process_any_cookies).start()

        completed = 0
        while completed != number_to_process:
            complete.acquire()
            completed += 1

    def test_process_any_cookies_when_no_processing_resources(self):
        processor_manager = BasicProcessorManager(
            self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders))

        complete = Semaphore(0)

        def on_complete(*args):
            complete.release()

        self.cookie_jar.mark_as_complete = MagicMock(side_effect=on_complete)

        rule_lock = Semaphore(0)
        match_lock = Lock()
        match_lock.acquire()

        def matching_criteria(cookie: Cookie, _:Context) -> bool:
            match_lock.release()
            rule_lock.acquire()
            return True

        self.rules.append(Rule(matching_criteria, lambda *args: True, RULE_IDENTIFIER))

        self.cookie_jar.mark_for_processing(self.cookie.identifier)
        processor_manager.process_any_cookies()
        match_lock.acquire()
        # Processor should have locked at this point - i.e. 0 free processors

        self.cookie_jar.mark_for_processing("/other/cookie")
        processor_manager.process_any_cookies()
        # The fact that there are more cookies should be "remembered" by the processor manager

        # Change the rules for the next cookie to be processed
        self.rules.pop()
        rule_execute_monitor = MagicMock(return_value=False)
        self.rules.append(Rule(lambda *args: True, rule_execute_monitor, RULE_IDENTIFIER))

        # Free the processor to complete the first cookie
        rule_lock.release()
        rule_lock.release()

        # Wait for both cookies to be processed
        completed = 0
        while completed != 2:
            complete.acquire()
            completed += 1

        self.cookie_jar.mark_as_complete.assert_has_calls([call(self.cookie.identifier), call("/other/cookie")])
        assert len(self.rules) == 1
        self.assertEqual(rule_execute_monitor.call_count, 1)
Beispiel #5
0
def run(config_location):
    # Load config
    config = load_config(os.path.join(config_location, "setup.conf"))

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP(S) connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16))
    patch_connection_pools(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second,
                                       config.cookie_jar.url,
                                       config.cookie_jar.database,
                                       config.cookie_jar.buffer_capacity,
                                       config.cookie_jar.buffer_latency,
                                       verify=config.cookie_jar.cacert)
    add_cookie_jar_logging(cookie_jar, logger)
    add_couchdb_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    slack = None

    # Setup rule output log file writer
    rule_log_writer = RuleOutputWriter(config.output.log_file)

    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_retrieval_manager_to_since_file(retrieval_manager, config_location)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.inject(APIDependency.System, None)
    api.listen(config.api.port)

    # Start the retrieval manager from the last known successful
    # retrieval time (or invocation time, otherwise)
    try:
        with open(os.path.join(config_location, "since"), "r") as f:
            since_time = datetime.fromtimestamp(int(f.read()))
    except:
        since_time = datetime.now()

    retrieval_manager.start(since_time)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
Beispiel #6
0
def run(config_location):
    # Load config
    config = load_config(config_location)

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16))
    patch_http_connection_pool(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url,
                                       config.cookie_jar.database)
    add_cookie_jar_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    #
    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    slack = message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.listen(config.api.port)

    # Start the retrieval manager
    retrieval_manager.start(config.retrieval.since)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
class TestIntegration(unittest.TestCase):
    """
    Integration tests for processor.
    """
    _NUMBER_OF_COOKIES = 250
    _NUMBER_OF_PROCESSORS = 10

    def setUp(self):
        self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__)
        self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__)

        self.resource_accessor = StubContext()

        # Setup enrichment
        self.enrichment_loader_source = EnrichmentLoaderSource(
            self.enrichment_loaders_directory, self.resource_accessor)
        self.enrichment_loader_source.start()

        # Setup cookie jar
        self.cookie_jar = create_magic_mock_cookie_jar()

        # Setup rules source
        self.rules_source = RuleSource(self.rules_directory, self.resource_accessor)
        self.rules_source.start()

        # Setup the data processor manager
        self.processor_manager = BasicProcessorManager(
            self.cookie_jar, self.rules_source, self.enrichment_loader_source)

        def cookie_jar_connector(*args):
            self.processor_manager.process_any_cookies()

        self.cookie_jar.add_listener(cookie_jar_connector)

    def tearDown(self):
        shutil.rmtree(self.rules_directory)
        shutil.rmtree(self.enrichment_loaders_directory)

    def test_with_no_rules_or_enrichments(self):
        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        block_until_processed(self.cookie_jar, cookie_ids, TestIntegration._NUMBER_OF_COOKIES)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, len(cookie_ids))
        self.cookie_jar.mark_as_failed.assert_not_called()

        # TODO: Call if no rules match and no further enrichments?

    @unittest.skip("Flaky test")
    def test_with_no_rules_but_enrichments(self):
        add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS)

        TestIntegration._NUMBER_OF_COOKIES = 1
        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE)
        expected_number_of_times_processed = len(cookie_ids) * 2
        block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed)
        self.cookie_jar.mark_as_failed.assert_not_called()

        enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all())
        enrichment_loader_checker.assert_call_counts(
            NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0)
        enrichment_loader_checker.assert_call_counts(
            HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids))
        enrichment_loader_checker.assert_call_counts(
            NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1)

        # TODO: Call if no rules match and no further enrichments?

    @unittest.skip("Flaky test")
    def test_with_rules_but_no_enrichments(self):
        add_data_files(self.rules_source, _RULE_FILE_LOCATIONS)

        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        cookie_ids.append(NAME_RULE_MATCH_COOKIE)
        expected_number_of_times_processed = len(cookie_ids)
        block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed)
        self.cookie_jar.mark_as_failed.assert_not_called()

        rule_checker = RuleChecker(self, self.rules_source.get_all())
        rule_checker.assert_call_counts(
            ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed)
        rule_checker.assert_call_counts(
            NO_MATCH_RULE_ID, expected_number_of_times_processed, 0)
        rule_checker.assert_call_counts(
            NAME_MATCH_RULE_ID, expected_number_of_times_processed, 1)
        rule_checker.assert_call_counts(
            HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, 0)

    @unittest.skip("Flaky test")
    def test_with_rules_and_enrichments(self):
        add_data_files(self.rules_source, _RULE_FILE_LOCATIONS)
        assert len(self.rules_source.get_all()) == len(_RULE_FILE_LOCATIONS)
        add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS)
        assert len(self.enrichment_loader_source.get_all()) == len(_ENRICHMENT_LOADER_LOCATIONS)

        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE)
        cookie_ids.append(NAME_RULE_MATCH_COOKIE)
        expected_number_of_times_processed = len(cookie_ids) * 2 + 1
        block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed)
        self.cookie_jar.mark_as_failed.assert_not_called()

        rule_checker = RuleChecker(self, self.rules_source.get_all())
        rule_checker.assert_call_counts(
            ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed)
        rule_checker.assert_call_counts(
            NO_MATCH_RULE_ID, expected_number_of_times_processed, 0)
        rule_checker.assert_call_counts(
            NAME_MATCH_RULE_ID, expected_number_of_times_processed, len(_ENRICHMENT_LOADER_LOCATIONS) - 1)
        rule_checker.assert_call_counts(
            HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, len(cookie_ids))

        enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all())
        enrichment_loader_checker.assert_call_counts(
            NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0)
        enrichment_loader_checker.assert_call_counts(
            HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids))
        enrichment_loader_checker.assert_call_counts(
            NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1)