def get_config(): try: flowauth_fernet_key = environ["FLOWAUTH_FERNET_KEY"].encode() _ = Fernet(flowauth_fernet_key) # Error if fernet key is bad log_level = getattr(logging, getenv("FLOWAUTH_LOG_LEVEL", "error").upper(), logging.ERROR) db_uri = getenv("DB_URI", os.getenv("DB_URI", "sqlite:////tmp/test.db")) db_uri = db_uri.format(getenv("FLOWAUTH_DB_PASSWORD", "")) return dict( PRIVATE_JWT_SIGNING_KEY=load_private_key( environ["PRIVATE_JWT_SIGNING_KEY"]), LOG_LEVEL=log_level, ADMIN_USER=environ["FLOWAUTH_ADMIN_USERNAME"], ADMIN_PASSWORD=environ["FLOWAUTH_ADMIN_PASSWORD"], SQLALCHEMY_DATABASE_URI=db_uri, SQLALCHEMY_ENGINE_OPTIONS=dict(pool_recycle=3600), SECRET_KEY=environ["SECRET_KEY"], SESSION_PROTECTION="strong", SQLALCHEMY_TRACK_MODIFICATIONS=False, FLOWAUTH_FERNET_KEY=flowauth_fernet_key, DEMO_MODE=True if getenv("DEMO_MODE") is not None else False, RESET_DB=True if getenv("RESET_FLOWAUTH_DB") is not None else False, DB_IS_SET_UP=Event(), CACHE_BACKEND=get_cache_backend(), ) except KeyError as e: raise UndefinedConfigOption( f"Undefined configuration option: '{e.args[0]}'. Please set docker secret or environment variable." )
def get_cache_backend() -> CacheRegion: """ Get a dogpilecache cache region. Returns ------- CacheRegion """ cache_backend = getenv("FLOWAUTH_CACHE_BACKEND", "FILE").upper() if cache_backend == "REDIS": backend = "dogpile.cache.redis" cache_args = dict( host=environ["FLOWAUTH_REDIS_HOST"], port=int(getenv("FLOWAUTH_REDIS_PORT", "6379")), db=int(getenv("FLOWAUTH_REDIS_DB", "0")), redis_expiration_time=32, distributed_lock=True, password=getenv("FLOWAUTH_REDIS_PASSWORD", None), ) elif cache_backend == "FILE": backend = "dogpile.cache.dbm" cache_args = dict(filename=environ["FLOWAUTH_CACHE_FILE"]) else: backend = "dogpile.cache.memory" cache_args = {} return make_region().configure(backend=backend, expiration_time=30, arguments=cache_args)
def get_config(): try: jwt_public_key = load_public_key(environ["PUBLIC_JWT_SIGNING_KEY"]) log_level = logging.getLevelName( getenv("FLOWAPI_LOG_LEVEL", "error").upper()) flowmachine_host = environ["FLOWMACHINE_HOST"] flowmachine_port = environ["FLOWMACHINE_PORT"] flowdb_user = environ["FLOWAPI_FLOWDB_USER"] flowdb_password = environ["FLOWAPI_FLOWDB_PASSWORD"] flowdb_host = environ["FLOWDB_HOST"] flowdb_port = environ["FLOWDB_PORT"] flowapi_server_id = environ["FLOWAPI_IDENTIFIER"] except KeyError as e: raise UndefinedConfigOption( f"Undefined configuration option: '{e.args[0]}'. Please set docker secret or environment variable." ) return dict( JWT_PUBLIC_KEY=jwt_public_key, JWT_ALGORITHM="RS256", FLOWAPI_LOG_LEVEL=log_level, FLOWMACHINE_HOST=flowmachine_host, FLOWMACHINE_PORT=flowmachine_port, FLOWDB_DSN= f"postgres://{flowdb_user}:{flowdb_password}@{flowdb_host}:{flowdb_port}/flowdb", JWT_DECODE_AUDIENCE=flowapi_server_id, )
def main(run_on_schedule: bool = True): """ Main function. Creates output directories, initialises the database, parses a workflows definition file to define workflows and configure the available dates sensor, and runs the available dates sensor. Parameters ---------- run_on_schedule : bool, default True Set run_on_schedule=False to run the sensor only once, ignoring the schedule. (useful for testing) """ # Initialise logger # TODO: Use structlog (not sure whether it will be possible for the prefect logger) log_level = os.environ["AUTOFLOW_LOG_LEVEL"] logger = logging.getLogger(__name__) handler = logging.StreamHandler() formatter = logging.Formatter( "[%(asctime)s] %(levelname)s - %(name)s | %(message)s" ) # Match prefect format for now formatter.converter = time.gmtime handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(log_level) logger.info(f"Log level for logger '{__name__}' set to '{log_level}'.") # Make output directories outputs_path = Path(os.environ["AUTOFLOW_OUTPUTS_DIR"]) logger.info( f"Creating output directories '{outputs_path/'notebooks'}' and '{outputs_path/'reports'}'." ) (outputs_path / "notebooks").mkdir(exist_ok=True) (outputs_path / "reports").mkdir(exist_ok=True) # Init DB # Note: AUTOFLOW_DB_URI must be an env var so that it can be used in prefect.config, so we read it using os.environ. # AUTOFLOW_DB_PASSWORD can (and should) be a docker secret, so we read it using get_secret_or_env_var. db_uri = os.environ["AUTOFLOW_DB_URI"] logger.info(f"Initialising database '{db_uri}'.") init_db(db_uri.format(getenv("AUTOFLOW_DB_PASSWORD", ""))) # Create workflows according to workflow definition file inputs_dir = os.environ["AUTOFLOW_INPUTS_DIR"] logger.info(f"Creating workflows defined in '{Path(inputs_dir)/'workflows.yml'}'.") workflow_storage, sensor_config = parse_workflows_yaml("workflows.yml", inputs_dir) # Run available dates sensor logger.info("Running available dates sensor.") available_dates_sensor.schedule = sensor_config["schedule"] available_dates_sensor.run( workflow_configs=sensor_config["workflows"], cdr_types=sensor_config["cdr_types"], workflow_storage=workflow_storage, run_on_schedule=run_on_schedule, )
def get_session(db_uri: str) -> "sqlalchemy.orm.session.Session": """ Create a sqlalchemy session. Parameters ---------- db_uri : str Database URI Returns ------- Session A sqlalchemy session """ # TODO: This seems like the wrong place to be reading a secret / env var, # but we can't put a docker secret in the prefect config. full_db_uri = db_uri.format(getenv("AUTOFLOW_DB_PASSWORD", "")) engine = create_engine(full_db_uri) return sessionmaker(bind=engine)()
def get_available_dates( cdr_types: Optional[Sequence[str]] = None, ) -> List[pendulum.Date]: """ Task to return a union of the dates for which data is available in FlowDB for the specified set of CDR types. Parameters ---------- cdr_types : list of str, optional Subset of CDR types for which to find available dates. If not provided, the union of available dates for all CDR types will be returned. Returns ------- list of pendulum.Date List of available dates, in chronological order """ prefect.context.logger.info( f"Getting available dates from FlowAPI at '{prefect.config.flowapi_url}'." ) conn = flowclient.connect( url=prefect.config.flowapi_url, token=environ["FLOWAPI_TOKEN"], ssl_certificate=getenv("SSL_CERTIFICATE_FILE"), ) dates = flowclient.get_available_dates(connection=conn) prefect.context.logger.debug(f"Available dates: {dates}") if cdr_types is None: prefect.context.logger.debug( "No CDR types provided. Will return available dates for all CDR types." ) cdr_types = dates.keys() else: prefect.context.logger.debug( f"Returning available dates for CDR types {cdr_types}.") unknown_cdr_types = set(cdr_types).difference(dates.keys()) if unknown_cdr_types: warnings.warn( f"No data available for CDR types {unknown_cdr_types}.") dates_union = set.union(*[ set(pendulum.parse(date, exact=True) for date in dates[cdr_type]) for cdr_type in cdr_types if cdr_type in dates.keys() ]) return sorted(list(dates_union))
def _do_connect( *, log_level: Optional[str] = None, flowdb_port: Optional[int] = None, flowdb_user: Optional[str] = None, flowdb_password: Optional[str] = None, flowdb_host: Optional[str] = None, flowdb_connection_pool_size: Optional[int] = None, flowdb_connection_pool_overflow: Optional[int] = None, redis_host: Optional[str] = None, redis_port: Optional[int] = None, redis_password: Optional[str] = None, conn: Optional[Connection] = None, ) -> Tuple[Connection, ThreadPoolExecutor, StrictRedis]: """ Connects flowmachine to a database, and performs initial set-up routines. You may provide a Settings object here, which can specify the database you wish to connect to, logging behaviour, available tables and so on. Parameters ---------- log_level : str, default "error" Level to log at flowdb_port : int, default 9000 Port number to connect to flowdb flowdb_user : str, default "flowmachine" Name of user to connect to flowdb as flowdb_password : str Password to connect to flowdb flowdb_host : str, default "localhost" Hostname of flowdb server flowdb_connection_pool_size : int, default 5 Default number of database connections to use flowdb_connection_pool_overflow : int, default 1 Number of extra database connections to allow redis_host : str, default "localhost" Hostname for redis server. redis_port : int, default 6379 Port the redis server is available on redis_password : str Password for the redis instance conn : flowmachine.core.Connection Optionally provide an existing Connection object to use, overriding any the db options specified here. Returns ------- Connection Notes ----- All parameters can also be provided as environment variables. If a parameter is provided, and an environment variable is set, then the provided value is used. If neither is provided, the defaults as given in the docstring are used. Parameters can _also_ be set using Docker secrets, in which case a file with the name of the parameter in upper case should be present at /run/secrets/THE_PARAM. If a secret is available, the secret takes precedence over both the environment variable, and the default. """ try: log_level = (getenv("FLOWMACHINE_LOG_LEVEL", "error") if log_level is None else log_level) flowdb_port = int( getenv("FLOWDB_PORT", "9000" ) if flowdb_port is None else flowdb_port) flowdb_user = (getenv("FLOWMACHINE_FLOWDB_USER", "flowmachine") if flowdb_user is None else flowdb_user) flowdb_password = (environ["FLOWMACHINE_FLOWDB_PASSWORD"] if flowdb_password is None else flowdb_password) flowdb_host = (getenv("FLOWDB_HOST", "localhost") if flowdb_host is None else flowdb_host) flowdb_connection_pool_size = (int( getenv("DB_CONNECTION_POOL_SIZE", "5")) if flowdb_connection_pool_size is None else flowdb_connection_pool_size) flowdb_connection_pool_overflow = int( getenv("DB_CONNECTION_POOL_OVERFLOW", "1" ) if flowdb_connection_pool_overflow is None else flowdb_connection_pool_overflow) redis_host = (getenv("REDIS_HOST", "localhost") if redis_host is None else redis_host) redis_port = int( getenv("REDIS_PORT", "6379") if redis_port is None else redis_port) redis_password = (environ["REDIS_PASSWORD"] if redis_password is None else redis_password) except KeyError as e: raise ValueError( f"You must provide a secret named {e.args[0]}, set an environment variable named {e.args[0]}, or provide the value as a parameter." ) set_log_level("flowmachine.debug", log_level) if conn is None: conn = Connection( host=flowdb_host, port=flowdb_port, user=flowdb_user, password=flowdb_password, database="flowdb", pool_size=flowdb_connection_pool_size, overflow=flowdb_connection_pool_overflow, ) redis_connection = redis.StrictRedis(host=redis_host, port=redis_port, password=redis_password) thread_pool = ThreadPoolExecutor(flowdb_connection_pool_size) conn.available_dates print(f"FlowMachine version: {flowmachine.__version__}") print( f"Flowdb running on: {flowdb_host}:{flowdb_port}/flowdb (connecting user: {flowdb_user})" ) return conn, thread_pool, redis_connection