Ejemplo n.º 1
0
class TestEnrichmentLoaderSource(unittest.TestCase):
    """
    Tests for `EnrichmentLoaderSource`.
    """
    def setUp(self):
        self.source = EnrichmentLoaderSource("/")

    def test_is_data_file_when_is(self):
        self.assertTrue(self.source.is_data_file("/my/file.loader.py"))

    def test_is_data_file_when_is_not(self):
        self.assertFalse(self.source.is_data_file("/my/file.py"))
Ejemplo n.º 2
0
    def setUp(self):
        self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__)
        self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__)

        self.resource_accessor = StubContext()

        # Setup enrichment
        self.enrichment_loader_source = EnrichmentLoaderSource(
            self.enrichment_loaders_directory, self.resource_accessor)
        self.enrichment_loader_source.start()

        # Setup cookie jar
        self.cookie_jar = create_magic_mock_cookie_jar()

        # Setup rules source
        self.rules_source = RuleSource(self.rules_directory, self.resource_accessor)
        self.rules_source.start()

        # Setup the data processor manager
        self.processor_manager = BasicProcessorManager(
            self.cookie_jar, self.rules_source, self.enrichment_loader_source)

        def cookie_jar_connector(*args):
            self.processor_manager.process_any_cookies()

        self.cookie_jar.add_listener(cookie_jar_connector)
Ejemplo n.º 3
0
 def setUp(self):
     self.source = EnrichmentLoaderSource("/")
Ejemplo n.º 4
0
def run(config_location):
    # Load config
    config = load_config(os.path.join(config_location, "setup.conf"))

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP(S) connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16))
    patch_connection_pools(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second,
                                       config.cookie_jar.url,
                                       config.cookie_jar.database,
                                       config.cookie_jar.buffer_capacity,
                                       config.cookie_jar.buffer_latency,
                                       verify=config.cookie_jar.cacert)
    add_cookie_jar_logging(cookie_jar, logger)
    add_couchdb_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    slack = None

    # Setup rule output log file writer
    rule_log_writer = RuleOutputWriter(config.output.log_file)

    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_retrieval_manager_to_since_file(retrieval_manager, config_location)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.inject(APIDependency.System, None)
    api.listen(config.api.port)

    # Start the retrieval manager from the last known successful
    # retrieval time (or invocation time, otherwise)
    try:
        with open(os.path.join(config_location, "since"), "r") as f:
            since_time = datetime.fromtimestamp(int(f.read()))
    except:
        since_time = datetime.now()

    retrieval_manager.start(since_time)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
Ejemplo n.º 5
0
def run(config_location):
    # Load config
    config = load_config(config_location)

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16))
    patch_http_connection_pool(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url,
                                       config.cookie_jar.database)
    add_cookie_jar_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    #
    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    slack = message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.listen(config.api.port)

    # Start the retrieval manager
    retrieval_manager.start(config.retrieval.since)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
Ejemplo n.º 6
0
class TestIntegration(unittest.TestCase):
    """
    Integration tests for processor.
    """
    _NUMBER_OF_COOKIES = 250
    _NUMBER_OF_PROCESSORS = 10

    def setUp(self):
        self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__)
        self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__)

        self.resource_accessor = StubContext()

        # Setup enrichment
        self.enrichment_loader_source = EnrichmentLoaderSource(
            self.enrichment_loaders_directory, self.resource_accessor)
        self.enrichment_loader_source.start()

        # Setup cookie jar
        self.cookie_jar = create_magic_mock_cookie_jar()

        # Setup rules source
        self.rules_source = RuleSource(self.rules_directory, self.resource_accessor)
        self.rules_source.start()

        # Setup the data processor manager
        self.processor_manager = BasicProcessorManager(
            self.cookie_jar, self.rules_source, self.enrichment_loader_source)

        def cookie_jar_connector(*args):
            self.processor_manager.process_any_cookies()

        self.cookie_jar.add_listener(cookie_jar_connector)

    def tearDown(self):
        shutil.rmtree(self.rules_directory)
        shutil.rmtree(self.enrichment_loaders_directory)

    def test_with_no_rules_or_enrichments(self):
        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        block_until_processed(self.cookie_jar, cookie_ids, TestIntegration._NUMBER_OF_COOKIES)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, len(cookie_ids))
        self.cookie_jar.mark_as_failed.assert_not_called()

        # TODO: Call if no rules match and no further enrichments?

    @unittest.skip("Flaky test")
    def test_with_no_rules_but_enrichments(self):
        add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS)

        TestIntegration._NUMBER_OF_COOKIES = 1
        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE)
        expected_number_of_times_processed = len(cookie_ids) * 2
        block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed)
        self.cookie_jar.mark_as_failed.assert_not_called()

        enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all())
        enrichment_loader_checker.assert_call_counts(
            NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0)
        enrichment_loader_checker.assert_call_counts(
            HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids))
        enrichment_loader_checker.assert_call_counts(
            NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1)

        # TODO: Call if no rules match and no further enrichments?

    @unittest.skip("Flaky test")
    def test_with_rules_but_no_enrichments(self):
        add_data_files(self.rules_source, _RULE_FILE_LOCATIONS)

        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        cookie_ids.append(NAME_RULE_MATCH_COOKIE)
        expected_number_of_times_processed = len(cookie_ids)
        block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed)
        self.cookie_jar.mark_as_failed.assert_not_called()

        rule_checker = RuleChecker(self, self.rules_source.get_all())
        rule_checker.assert_call_counts(
            ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed)
        rule_checker.assert_call_counts(
            NO_MATCH_RULE_ID, expected_number_of_times_processed, 0)
        rule_checker.assert_call_counts(
            NAME_MATCH_RULE_ID, expected_number_of_times_processed, 1)
        rule_checker.assert_call_counts(
            HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, 0)

    @unittest.skip("Flaky test")
    def test_with_rules_and_enrichments(self):
        add_data_files(self.rules_source, _RULE_FILE_LOCATIONS)
        assert len(self.rules_source.get_all()) == len(_RULE_FILE_LOCATIONS)
        add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS)
        assert len(self.enrichment_loader_source.get_all()) == len(_ENRICHMENT_LOADER_LOCATIONS)

        cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES)
        cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE)
        cookie_ids.append(NAME_RULE_MATCH_COOKIE)
        expected_number_of_times_processed = len(cookie_ids) * 2 + 1
        block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed)

        self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed)
        self.cookie_jar.mark_as_failed.assert_not_called()

        rule_checker = RuleChecker(self, self.rules_source.get_all())
        rule_checker.assert_call_counts(
            ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed)
        rule_checker.assert_call_counts(
            NO_MATCH_RULE_ID, expected_number_of_times_processed, 0)
        rule_checker.assert_call_counts(
            NAME_MATCH_RULE_ID, expected_number_of_times_processed, len(_ENRICHMENT_LOADER_LOCATIONS) - 1)
        rule_checker.assert_call_counts(
            HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, len(cookie_ids))

        enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all())
        enrichment_loader_checker.assert_call_counts(
            NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0)
        enrichment_loader_checker.assert_call_counts(
            HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids))
        enrichment_loader_checker.assert_call_counts(
            NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1)