Beispiel #1
0
    def test_run_register_metrics(self):
        from baskerville.models.engine import BaskervilleAnalyticsEngine
        with mock.patch.object(BaskervilleAnalyticsEngine,
                               '_set_up_pipeline') as mock__set_up_pipeline:
            with mock.patch.object(BaskervilleAnalyticsEngine,
                                   '_register_performance_stats'
                                   ) as mock_register_performance_stats:
                pipeline = mock.MagicMock()
                mock__set_up_pipeline.return_value = pipeline
                mock_register_performance_stats.return_value = \
                    'should return a performance_stats instance'
                metrics = {
                    'performance': True  # completely mocked just to step into
                    # the register_performance_stats step
                }

                engine = BaskervilleAnalyticsEngine(RunType.kafka,
                                                    self.test_config,
                                                    register_metrics=True)
                engine.config.engine.metrics = MetricsConfig(metrics)
                engine.run()

                mock__set_up_pipeline.assert_called_once()
                mock_register_performance_stats.assert_called_once()
                pipeline.run.assert_called_once()
                self.assertTrue(engine.performance_stats ==
                                'should return a performance_stats instance')
Beispiel #2
0
    def test__set_up_pipeline_auto_spark(self):
        from baskerville.models.pipelines import KafkaPipeline
        with mock.patch.object(KafkaPipeline, '__init__') as mock_pipeline:
            from baskerville.models.engine import BaskervilleAnalyticsEngine
            mock_pipeline.return_value = None
            engine = BaskervilleAnalyticsEngine(RunType.kafka,
                                                self.test_config)
            self.assertTrue(engine.run_type == RunType.kafka)

            engine.config.engine.use_spark = True
            p = engine._set_up_pipeline()

            mock_pipeline.assert_called_once()
            self.assertTrue(isinstance(p, KafkaPipeline))
Beispiel #3
0
 def test__set_up_pipeline_manual_raw_logs_path_spark(self):
     from baskerville.models.pipelines import RawLogPipeline
     with mock.patch.object(RawLogPipeline, '__init__') as mock_pipeline:
         from baskerville.models.engine import BaskervilleAnalyticsEngine
         mock_pipeline.return_value = None
         engine = BaskervilleAnalyticsEngine(RunType.rawlog,
                                             self.test_config)
         self.assertTrue(engine.run_type == RunType.rawlog)
         engine.config.engine.manual.host = None
         engine.config.engine.manual.raw_logs_path = 'some_path'
         engine.config.engine.manual.chunk_size = 0
         engine.config.engine.use_spark = True
         p = engine._set_up_pipeline()
         mock_pipeline.assert_called_once()
         self.assertTrue(isinstance(p, RawLogPipeline))
Beispiel #4
0
def main():
    """
    Baskerville commandline arguments
    :return:
    """
    global baskerville_engine, logger
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "pipeline",
        help="Pipeline to use: es, rawlog, or kafka",
    )
    parser.add_argument(
        "-s", "--simulate", dest="simulate",  action="store_true",
        help="Simulate real-time run using kafka",
    )
    parser.add_argument(
        "-e", "--startexporter", dest="start_exporter",
        action="store_true",
        help="Start the Baskerville Prometheus exporter at the specified "
             "in the configuration port",
    )

    parser.add_argument(
        "-t", "--testmodel", dest="test_model",
        help="Add a test model in the models table",
        default=False,
        action="store_true"
    )

    parser.add_argument(
        "-c", "--conf", action="store", dest="conf_file",
        default=os.path.join(src_dir, '..', 'conf', 'baskerville.yaml'),
        help="Path to config file"
    )

    parser.add_argument(
        "-t", "--testmodel", dest="test_model",
        help="Add a test model in the models table",
        default=False,
        action="store_true"
    )

    args = parser.parse_args()
    conf = parse_config(path=args.conf_file)

    baskerville_engine = BaskervilleAnalyticsEngine(
        args.pipeline, conf, register_metrics=args.start_exporter
    )
    logger = get_logger(
        __name__,
        logging_level=baskerville_engine.config.engine.log_level,
        output_file=baskerville_engine.config.engine.logpath
    )

    # start simulation if specified
    if args.simulate:
        spark = None
        if baskerville_engine.config.engine.use_spark:
            from baskerville.spark import get_spark_session
            spark = get_spark_session()  # baskerville.pipeline.spark

        logger.info('Starting simulation...')
        run_simulation(baskerville_engine.config, spark)

    # start baskerville prometheus exporter if specified
    if args.start_exporter:
        if not baskerville_engine.config.engine.metrics:
            raise RuntimeError(f'Cannot start exporter without metrics config')
        port = baskerville_engine.config.engine.metrics.port
        start_http_server(port)
        logger.info(f'Starting Baskerville Exporter at '
                    f'http://localhost:{port}')

    # populate with test data if specified
    if args.test_model:
        add_model_to_database(conf['database'])

    for p in PROCESS_LIST[::-1]:
        print(f"{p.name} starting...")
        p.start()

    logger.info('Starting Baskerville Engine...')
    baskerville_engine.run()
Beispiel #5
0
    def test_register_performance_stats(self):
        from baskerville.models.engine import BaskervilleAnalyticsEngine
        with mock.patch.object(BaskervilleAnalyticsEngine,
                               '_set_up_pipeline') as _:
            engine = BaskervilleAnalyticsEngine(RunType.kafka,
                                                self.test_config)

            mock_pipeline = mock.MagicMock()
            mock_feature1 = mock.MagicMock()
            mock_feature2 = mock.MagicMock()

            mock_feature1.feature_name = 'mock_feature1'
            mock_feature2.feature_name = 'mock_feature2'

            mock_feature1.compute = lambda a: a
            mock_feature2.compute = lambda a: a

            mock_pipeline.feature_manager = mock.MagicMock()
            mock_pipeline.feature_manager.active_features = [
                mock_feature1, mock_feature2
            ]
            mock_pipeline.test_method_name_1.__name__ = 'test_method_name_1'
            mock_pipeline.test_method_name_2.__name__ = 'test_method_name_2'
            mock_pipeline.request_set_cache.test_method_name_3.__name__ = \
                'test_method_name_3'
            mock_pipeline.request_set_cache.test_method_name_4.__name__ = \
                'test_method_name_4'

            engine.pipeline = mock_pipeline

            metrics = {
                'performance': {
                    'pipeline': ['test_method_name_1', 'test_method_name_2'],
                    'request_set_cache':
                    ['test_method_name_3', 'test_method_name_4'],
                    'features':
                    True,
                }
            }
            engine.config.engine.metrics = MetricsConfig(metrics)
            performance_stats = engine._register_performance_stats()

            self.assertTrue(
                f'{performance_stats._prefix}timer_for_pipeline_test_method_name_1'
                in performance_stats.registry)
            self.assertTrue(
                f'{performance_stats._prefix}timer_for_pipeline_test_method_name_2'
                in performance_stats.registry)
            self.assertTrue(
                f'{performance_stats._prefix}timer_for_request_set_cache_test_method_name_3'
                in performance_stats.registry)
            self.assertTrue(
                f'{performance_stats._prefix}timer_for_request_set_cache_test_method_name_4'
                in performance_stats.registry)

            self.assertTrue(
                f'{performance_stats._prefix}timer_for_feature_'
                f'{mock_feature1.feature_name}' in performance_stats.registry)
            self.assertTrue(
                f'{performance_stats._prefix}timer_for_feature_'
                f'{mock_feature2.feature_name}' in performance_stats.registry)
Beispiel #6
0
 def test_instance(self, ):
     from baskerville.models.engine import BaskervilleAnalyticsEngine
     engine = BaskervilleAnalyticsEngine(RunType.rawlog, self.test_config)
     self.assertTrue(engine.run_type == RunType.rawlog)