def create_scraper_monitor_flow(): # Run every day at 15 UTC (or 10 ET) schedule = CronSchedule("0 15 * * *") with Flow("MonitorFailingScrapers", schedule) as flow: slack_webhook_url = EnvVarSecret("SLACK_WEBHOOK_URL") run_monitor_scrapers(slack_webhook_url) return flow
def create_flow_for_table(table_name): sched = CronSchedule("50 */2 * * *") tn = f"data.{table_name}" sn = f"{tn}_id_seq" with Flow(f"clean-sql-{table_name}", sched) as flow: connstr = EnvVarSecret("COVID_DB_CONN_URI") ready = truncate_table(connstr, tn) reset_sequence(connstr, sn, ready) return flow
def create_cdc_single_state_flow(): with Flow(CDCCovidDataTracker.__name__) as flow: state = prefect.Parameter("state") connstr = EnvVarSecret("COVID_DB_CONN_URI") sentry_dsn = EnvVarSecret("SENTRY_DSN") sentry_sdk_task = initialize_sentry(sentry_dsn) d = create_scraper(CDCCovidDataTracker, state=state) fetched = fetch(d) normalized = normalize(d) validated = validate(d) done = put(d, connstr) d.set_upstream(sentry_sdk_task) normalized.set_upstream(fetched) validated.set_upstream(normalized) done.set_upstream(validated) return flow
def create_flow_for_scraper(ix: int, cls: Type[DatasetBase]): sched = CronSchedule(f"{ix % 60} */4 * * *") with Flow(cls.__name__, sched) as flow: connstr = EnvVarSecret("COVID_DB_CONN_URI") sentry_dsn = EnvVarSecret("SENTRY_DSN") sentry_sdk_task = initialize_sentry(sentry_dsn) d = create_scraper(cls) fetched = fetch(d) normalized = normalize(d) validated = validate(d) done = put(d, connstr) d.set_upstream(sentry_sdk_task) normalized.set_upstream(fetched) validated.set_upstream(normalized) done.set_upstream(validated) return flow
def test_run_secret_with_cast(monkeypatch): monkeypatch.setenv("FOO", "1") e = EnvVarSecret(env_var="FOO", cast=int) assert e.run() == 1
def test_run_secret_without_env_var_set_returns_none_even_if_cast_set( monkeypatch): monkeypatch.delenv("FOO", raising=False) e = EnvVarSecret(env_var="FOO", cast=int) assert e.run() is None
def test_default_cast_is_none(): e = EnvVarSecret(env_var="FOO") assert e.cast is None
def test_run_secret(monkeypatch): monkeypatch.setenv("FOO", "1") e = EnvVarSecret(env_var="FOO") assert e.run() == "1"
def test_secret_name_set_at_runtime(monkeypatch): monkeypatch.setenv("FOO", "1") e = EnvVarSecret() assert e.run("FOO") == "1"
def test_name_can_be_customized(): e = EnvVarSecret(env_var="FOO", name="BAR") assert e.env_var == "FOO" assert e.name == "BAR"
def test_run_secret_without_name_set_returns_none(monkeypatch): monkeypatch.delenv("FOO", raising=False) e = EnvVarSecret(name="FOO") assert e.run() is None
def test_run_secret_with_cast_datetime(monkeypatch): monkeypatch.setenv("FOO", "2019-01-02 03:04:05") e = EnvVarSecret(env_var="FOO", cast=pendulum.parse) assert e.run() == pendulum.datetime(2019, 1, 2, 3, 4, 5)
def test_init_with_name(): e = EnvVarSecret(name="FOO") assert e.name == "FOO"
blob_client = client.get_blob_client( container=container, blob=f"{file_name}_{datetime.datetime.now()}") upload_file_path = os.path.join(path, file_name) image_content_setting = ContentSettings(content_type='image/jpeg') logger, add_utility = prefect.context.get("logger"), logger_helper() logger.info(f"Uploading file - {file_name}") with open(upload_file_path, "rb") as data: blob_client.upload_blob("hello", overwrite=True, content_settings=image_content_setting) # FLOW DEFINITIONS with Flow("Upload to Azure") as flow: connection = EnvVarSecret("BLOB_STORAGE_KEY")( upstream_tasks=[file_name, file_path, blob_container]) client = start_azure_client(connection=connection) single_or_multiple = file_count_check(upstream_tasks=[client]) with case(single_or_multiple, True): upload = upload_image(client=client, file_name=file_name, container=blob_container, path=file_path) with case(single_or_multiple, False): upload_all_images_in_folder(client=client, file_name=file_name, container=blob_container, path=file_path) if __name__ == "__main__":
def test_run_secret_without_name_set_raises(monkeypatch): monkeypatch.delenv("FOO", raising=False) e = EnvVarSecret(name="FOO", raise_if_missing=True) with pytest.raises(ValueError, match="variable not set"): e.run()
def test_run_secret_with_new_name_at_runtime_and_raise_missing(monkeypatch): monkeypatch.setenv("FOO", "1") e = EnvVarSecret(name="FOO", raise_if_missing=True) with pytest.raises(ValueError, match="variable not set"): e.run(name="BAR")
def test_secret_raises_if_no_name_provided(): e = EnvVarSecret() with pytest.raises(ValueError, match="secret name must be provided"): e.run()
def test_create_envvarsecret_requires_env_var(): with pytest.raises(TypeError, match="required positional argument: 'env_var'"): EnvVarSecret()
retry_delay=timedelta(minutes=1), nout=2, trigger=triggers.all_finished, ) def create_parquet(_success): ts = prefect.context.scheduled_start_time dt_str = pd.to_datetime(ts).strftime("%Y-%m-%dT%H") vintage_fn = FN_STR.format(dt_str) + ".parquet" fn = FN_STR.format("") + ".parquet" df = pd.read_csv(CSV_FN, parse_dates=["dt"]) df.to_parquet(DATA_PATH / vintage_fn, index=False) df.to_parquet(DATA_PATH / fn, index=False) return vintage_fn, fn @task def get_gcs_cmd(fn): return f"gsutil acl ch -u AllUsers:R gs://can-scrape-outputs/final/{fn}" shell = ShellTask() with Flow("UpdateParquetFiles", CronSchedule("10 */2 * * *")) as f: connstr = EnvVarSecret("COVID_DB_CONN_URI") success = export_to_csv(connstr) vintage_fn, fn = create_parquet(success) shell(get_gcs_cmd(vintage_fn)) shell(get_gcs_cmd(fn)) f.register(project_name="can-scrape")
def test_run_secret_without_env_var_set_raises_with_cast(monkeypatch): monkeypatch.delenv("FOO", raising=False) e = EnvVarSecret(env_var="FOO", raise_if_missing=True, cast=int) with pytest.raises(ValueError, match="variable not set"): e.run()
def test_name_defaults_to_env_var(): e = EnvVarSecret(env_var="FOO") assert e.env_var == "FOO" assert e.name == "FOO"
sql_task = SqlServerFetch(db_name=prefect.config.sql_server.database, user=prefect.config.sql_server.user, host=prefect.config.sql_server.server, query=get_manual_override_rows, fetch='many', fetch_count=3, result=result_formatter, name="SQL-stuff" # commit: bool = False, ) #-------------------------------------------------------------- # Flow context #-------------------------------------------------------------- with Flow("github_flow") as f: password = EnvVarSecret(prefect.config.sql_server.password_var) logger = prefect.context.get("logger") thing = Parameter("thing", default=["Thing 1"]) d = dog(thing) s = sql_task(password=password) v = view_sql(s) #-------------------------------------------------------------- # Closing Details #-------------------------------------------------------------- f.run_config = LocalRun(env={ "PREFECT__USER_CONFIG_PATH": '/Users/peytonrunyan/TRP/prefect/config.toml'
def test_run_secret_with_new_name_at_runtime(monkeypatch): monkeypatch.setenv("FOO", "1") e = EnvVarSecret(name="FOO") assert e.run(name="BAR") is None