def test_parse_config_more_than_one_env_value(self): os.environ['TEST_ENV_TAG'] = 'it works!' os.environ['OTHER_TEST_TAG'] = 'this works too!' test_data = ''' test1: data0: !TEST ${TEST_ENV_TAG}/somethingelse/${OTHER_TEST_TAG} data1: !TEST ${OTHER_TEST_TAG} ''' config = parse_config(data=test_data, tag='!TEST') expected_config = { 'test1': { 'data0': 'it works!/somethingelse/this works too!', 'data1': 'this works too!' } } self.assertDictEqual(config, expected_config)
def test_parse_config_diff_tag(self): os.environ['TEST_ENV_TAG'] = 'it works!' os.environ['OTHER_TEST_TAG'] = 'this works too!' test_data = ''' test1: data0: !TEST ${TEST_ENV_TAG} data1: !TEST ${OTHER_TEST_TAG} ''' config = parse_config(data=test_data, tag='!TEST') expected_config = { 'test1': { 'data0': 'it works!', 'data1': 'this works too!' } } self.assertDictEqual(config, expected_config)
def test_parse_config_with_file_path(self): os.environ['TEST_ENV_TAG'] = 'it works!' os.environ['OTHER_TEST_TAG'] = 'this works too!' test_data = ''' test1: data0: !ENV ${TEST_ENV_TAG} data1: !ENV ${OTHER_TEST_TAG} ''' with open(self.test_file_name, 'w') as test_file: test_file.write(test_data) config = parse_config(path=self.test_file_name) expected_config = { 'test1': { 'data0': 'it works!', 'data1': 'this works too!' } } self.assertDictEqual(config, expected_config)
def main(): """ Baskerville commandline arguments :return: """ global baskerville_engine, logger parser = argparse.ArgumentParser() parser.add_argument( "pipeline", help="Pipeline to use: es, rawlog, or kafka", ) parser.add_argument( "-s", "--simulate", dest="simulate", action="store_true", help="Simulate real-time run using kafka", ) parser.add_argument( "-e", "--startexporter", dest="start_exporter", action="store_true", help="Start the Baskerville Prometheus exporter at the specified " "in the configuration port", ) parser.add_argument( "-t", "--testmodel", dest="test_model", help="Add a test model in the models table", default=False, action="store_true" ) parser.add_argument( "-c", "--conf", action="store", dest="conf_file", default=os.path.join(src_dir, '..', 'conf', 'baskerville.yaml'), help="Path to config file" ) parser.add_argument( "-t", "--testmodel", dest="test_model", help="Add a test model in the models table", default=False, action="store_true" ) args = parser.parse_args() conf = parse_config(path=args.conf_file) baskerville_engine = BaskervilleAnalyticsEngine( args.pipeline, conf, register_metrics=args.start_exporter ) logger = get_logger( __name__, logging_level=baskerville_engine.config.engine.log_level, output_file=baskerville_engine.config.engine.logpath ) # start simulation if specified if args.simulate: spark = None if baskerville_engine.config.engine.use_spark: from baskerville.spark import get_spark_session spark = get_spark_session() # baskerville.pipeline.spark logger.info('Starting simulation...') run_simulation(baskerville_engine.config, spark) # start baskerville prometheus exporter if specified if args.start_exporter: if not baskerville_engine.config.engine.metrics: raise RuntimeError(f'Cannot start exporter without metrics config') port = baskerville_engine.config.engine.metrics.port start_http_server(port) logger.info(f'Starting Baskerville Exporter at ' f'http://localhost:{port}') # populate with test data if specified if args.test_model: add_model_to_database(conf['database']) for p in PROCESS_LIST[::-1]: print(f"{p.name} starting...") p.start() logger.info('Starting Baskerville Engine...') baskerville_engine.run()
def maintain_db(): """ Runs the partitioning and archive scripts :return: """ # todo: this can fail silently baskerville_root = os.environ.get( 'BASKERVILLE_ROOT', '../../../../baskerville' ) # we need the current config for the database details config = parse_config(path=f'{baskerville_root}/conf/baskerville.yaml') logger = get_logger( __name__, logging_level=config['engine']['log_level'], output_file=config['engine']['logpath'] ) db_config = DatabaseConfig(config['database']).validate() if db_config.maintenance.partition_by != 'week': raise NotImplementedError( f'Partition by {db_config.maintenance.partition_by} ' f'is not yet implemented' ) # maintainance will run every Sunday, so now should be Sunday night # move to the start of Monday now = datetime.utcnow() y, w, _ = now.isocalendar() partition_start_week = isoweek.Week(y, w + 1) start = datetime.combine( partition_start_week.monday(), datetime.min.time() ) end = datetime.combine( partition_start_week.sunday(), datetime.max.time() ) logger.info(f'Data Partition Start : {start}') diy = get_days_in_year(end.year) latest_archive_date = end - timedelta(days=diy) latest_archive_year, latest_archive_week, _ = latest_archive_date.isocalendar() print(latest_archive_week, latest_archive_year) if latest_archive_week > 1: latest_archive_week = latest_archive_week - 1 else: latest_archive_week = isoweek.Week.last_week_of_year( latest_archive_year-1 ).week latest_archive_year = latest_archive_year - 1 week = isoweek.Week(latest_archive_year, latest_archive_week) print(week) db_config.maintenance.data_partition.since = start db_config.maintenance.data_partition.until = ( start + timedelta(days=6) ).replace( hour=23, minute=59, second=59 ) db_config.maintenance.data_archive.since = datetime.combine( week.monday(), datetime.min.time() ) db_config.maintenance.data_archive.until = datetime.combine( week.sunday(), datetime.max.time() ) print(db_config.maintenance.data_partition) print(db_config.maintenance.data_archive) # get sql scripts partition_sql = get_temporal_partitions(db_config.maintenance) archive_sql = get_archive_script( latest_archive_date - timedelta(weeks=1), latest_archive_date ) logger.debug(partition_sql) logger.debug(archive_sql) session, engine = set_up_db(db_config.__dict__, create=False) try: # create partitions session.execute(partition_sql) session.commit() print('Partitioning done') # detach partitions over a year and attach them to the archive table session.execute(archive_sql) session.commit() print('Archive done') except SQLAlchemyError as e: traceback.print_exc() session.rollback() logger.error(f'Error executing maintenance: {e}') finally: session.close()