Ejemplo n.º 1
0
    def setUp(self):
        """
        Test set up:

        * Build, if necessary, and start a CouchDB container and
          connect as a BiscuitTin instance
        * Start the HTTP API service on a free port, with the necessary
          dependencies injected
        * Create an HTTP client connection to the API service
        """
        self.couchdb_container = CouchDBContainer()

        # Configuration for Cookie Jar
        self.HOST = self.couchdb_container.couchdb_fqdn
        self.DB = 'elmo-test'

        self.jar = BiscuitTin(self.HOST, self.DB, 1, timedelta(0))

        # Configuration for HTTP service
        self.API_PORT = get_open_port()

        self.api = HTTP_API()
        self.api.inject(APIDependency.CookieJar, self.jar)
        self.api.inject(APIDependency.System, None)
        self.api.listen(self.API_PORT)

        self.http = HTTPConnection('localhost', self.API_PORT)
        self.REQ_HEADER = {'Accept': 'application/json'}

        # Block until service is up (or timeout)
        start_time = finish_time = datetime.now()
        service_up = False
        while finish_time - start_time < timedelta(seconds=5):
            response = None

            try:
                self.http.request('HEAD', '/')
                response = self.http.getresponse()

            except:
                sleep(0.1)

            finally:
                self.http.close()
                finish_time = datetime.now()

            if isinstance(response, HTTPResponse):
                service_up = True
                break

        if not service_up:
            self.tearDown()
            raise ConnectionError('Couldn\'t start API service in a reasonable amount of time')
Ejemplo n.º 2
0
def run(config_location):
    # Load config
    config = load_config(os.path.join(config_location, "setup.conf"))

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP(S) connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16))
    patch_connection_pools(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second,
                                       config.cookie_jar.url,
                                       config.cookie_jar.database,
                                       config.cookie_jar.buffer_capacity,
                                       config.cookie_jar.buffer_latency,
                                       verify=config.cookie_jar.cacert)
    add_cookie_jar_logging(cookie_jar, logger)
    add_couchdb_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    slack = None

    # Setup rule output log file writer
    rule_log_writer = RuleOutputWriter(config.output.log_file)

    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_retrieval_manager_to_since_file(retrieval_manager, config_location)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.inject(APIDependency.System, None)
    api.listen(config.api.port)

    # Start the retrieval manager from the last known successful
    # retrieval time (or invocation time, otherwise)
    try:
        with open(os.path.join(config_location, "since"), "r") as f:
            since_time = datetime.fromtimestamp(int(f.read()))
    except:
        since_time = datetime.now()

    retrieval_manager.start(since_time)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
Ejemplo n.º 3
0
class TestElmo(unittest.TestCase):
    def setUp(self):
        """
        Test set up:

        * Build, if necessary, and start a CouchDB container and
          connect as a BiscuitTin instance
        * Start the HTTP API service on a free port, with the necessary
          dependencies injected
        * Create an HTTP client connection to the API service
        """
        self.couchdb_container = CouchDBContainer()

        # Configuration for Cookie Jar
        self.HOST = self.couchdb_container.couchdb_fqdn
        self.DB = 'elmo-test'

        self.jar = BiscuitTin(self.HOST, self.DB, 1, timedelta(0))

        # Configuration for HTTP service
        self.API_PORT = get_open_port()

        self.api = HTTP_API()
        self.api.inject(APIDependency.CookieJar, self.jar)
        self.api.inject(APIDependency.System, None)
        self.api.listen(self.API_PORT)

        self.http = HTTPConnection('localhost', self.API_PORT)
        self.REQ_HEADER = {'Accept': 'application/json'}

        # Block until service is up (or timeout)
        start_time = finish_time = datetime.now()
        service_up = False
        while finish_time - start_time < timedelta(seconds=5):
            response = None

            try:
                self.http.request('HEAD', '/')
                response = self.http.getresponse()

            except:
                sleep(0.1)

            finally:
                self.http.close()
                finish_time = datetime.now()

            if isinstance(response, HTTPResponse):
                service_up = True
                break

        if not service_up:
            self.tearDown()
            raise ConnectionError('Couldn\'t start API service in a reasonable amount of time')

    def tearDown(self):
        """ Tear down test set up """
        self.http.close()
        self.api.stop()
        self.couchdb_container.tear_down()

    def test_queue(self):
        """
        HTTP API: GET /queue
        """
        self.http.request('GET', '/queue', headers=self.REQ_HEADER)
        r = self.http.getresponse()

        self.assertEqual(r.status, 200)
        self.assertEqual(r.headers.get_content_type(), 'application/json')

        data = _decode_json_response(r)
        self.assertIn('queue_length', data)
        self.assertEqual(data['queue_length'], self.jar.queue_length()) # Should be 0

        self.http.close()

        # Add item to the queue
        self.jar.mark_for_processing('/foo')

        self.http.request('GET', '/queue', headers=self.REQ_HEADER)
        data = _decode_json_response(self.http.getresponse())
        self.assertEqual(data['queue_length'], self.jar.queue_length()) # Should be 1

    def test_reprocess(self):
        """
        HTTP API: POST /queue/reprocess
        """
        # Add mocked update notifier to Cookie Jar
        dirty_cookie_listener = MagicMock()
        self.jar.add_listener(dirty_cookie_listener)

        cookie_identifier = '/foo'
        request = {'identifier': cookie_identifier}
        self.http.request('POST', '/queue/reprocess', body=json.dumps(request), headers=self.REQ_HEADER)
        r = self.http.getresponse()

        self.assertEqual(r.status, 200)
        self.assertEqual(r.headers.get_content_type(), 'application/json')

        data = _decode_json_response(r)
        self.assertEqual(data, request)

        self.http.close()

        # Check queue has been updated
        self.assertEqual(self.jar.queue_length(), 1)
        self.assertEqual(dirty_cookie_listener.call_count, 1)

    @staticmethod
    def _url_for_identifier(identifier:str):
        """ URL for identifier """
        if identifier[0] == "/":
            return '/cookiejar?identifier={}'.format(identifier)
        else:
            return '/cookiejar/{}'.format(identifier)

    def _fetch_test(self, identifier:str):
        """ Generic fetch test """
        source = 'foobar'
        timestamp = datetime.now().replace(microsecond=0, tzinfo=timezone.utc)
        metadata = Metadata({'foo': 123, 'bar': 'quux'})
        enrichment = Enrichment(source, timestamp, metadata)

        self.jar.enrich_cookie(identifier, enrichment)

        self.http.request('GET', TestElmo._url_for_identifier(identifier), headers=self.REQ_HEADER)
        r = self.http.getresponse()

        self.assertEqual(r.status, 200)
        self.assertEqual(r.headers.get_content_type(), 'application/json')

        data = _decode_json_response(r)

        fetched_identifier = data['identifier']
        fetched_enrichment = json.loads(json.dumps(data['enrichments']), cls=EnrichmentJSONDecoder)[0]

        self.assertEqual(fetched_identifier, identifier)
        self.assertEqual(fetched_enrichment, enrichment)

    def test_fetch_by_qs(self):
        """
        HTTP API: GET /cookiejar?identifier=<identifier>
        """
        self._fetch_test('/path/to/foo')

    def test_fetch_by_route(self):
        """
        HTTP API: GET /cookiejar/<identifier>
        """
        self._fetch_test('foo_bar')

    def _delete_test(self, identifier:str):
        """ Generic delete test """
        self.jar.mark_for_processing(identifier)
        self.jar.mark_as_complete(identifier)

        cookie = self.jar.fetch_cookie(identifier)
        self.assertIsInstance(cookie, Cookie)

        self.http.request('DELETE', TestElmo._url_for_identifier(identifier), headers=self.REQ_HEADER)
        r = self.http.getresponse()

        self.assertEqual(r.status, 200)
        self.assertEqual(r.headers.get_content_type(), 'application/json')

        data = _decode_json_response(r)
        self.assertEqual(data, {'deleted':identifier})

        deleted_cookie = self.jar.fetch_cookie(identifier)
        self.assertIsNone(deleted_cookie)

    def test_delete_by_qs(self):
        """
        HTTP API: DELETE /cookiejar?identifier=<identifier>
        """
        self._delete_test('/path/to/foo')

    def test_delete_by_route(self):
        """
        HTTP API: DELETE /cookiejar/<identifier>
        """
        self._delete_test('foo_bar')

    def test_thread_dump(self):
        """
        HTTP API: GET /debug/threads

        Note: This test only proves that the endpoint returns an OK
        response and JSON data.
        TODO At least validate the returned data's schema
        """
        self.http.request('GET', '/debug/threads', headers=self.REQ_HEADER)
        r = self.http.getresponse()

        self.assertEqual(r.status, 200)
        self.assertEqual(r.headers.get_content_type(), 'application/json')
Ejemplo n.º 4
0
def run(config_location):
    # Load config
    config = load_config(config_location)

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16))
    patch_http_connection_pool(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url,
                                       config.cookie_jar.database)
    add_cookie_jar_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    #
    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    slack = message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.listen(config.api.port)

    # Start the retrieval manager
    retrieval_manager.start(config.retrieval.since)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()