def create_scraper_monitor_flow():
    # Run every day at 15 UTC (or 10 ET)
    schedule = CronSchedule("0 15 * * *")
    with Flow("MonitorFailingScrapers", schedule) as flow:
        slack_webhook_url = EnvVarSecret("SLACK_WEBHOOK_URL")
        run_monitor_scrapers(slack_webhook_url)

    return flow
def create_flow_for_table(table_name):
    sched = CronSchedule("50 */2 * * *")
    tn = f"data.{table_name}"
    sn = f"{tn}_id_seq"
    with Flow(f"clean-sql-{table_name}", sched) as flow:
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        ready = truncate_table(connstr, tn)
        reset_sequence(connstr, sn, ready)

    return flow
def create_cdc_single_state_flow():
    with Flow(CDCCovidDataTracker.__name__) as flow:
        state = prefect.Parameter("state")
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        sentry_dsn = EnvVarSecret("SENTRY_DSN")
        sentry_sdk_task = initialize_sentry(sentry_dsn)

        d = create_scraper(CDCCovidDataTracker, state=state)
        fetched = fetch(d)
        normalized = normalize(d)
        validated = validate(d)
        done = put(d, connstr)

        d.set_upstream(sentry_sdk_task)
        normalized.set_upstream(fetched)
        validated.set_upstream(normalized)
        done.set_upstream(validated)

    return flow
Example #4
0
def create_flow_for_scraper(ix: int, cls: Type[DatasetBase]):
    sched = CronSchedule(f"{ix % 60} */4 * * *")

    with Flow(cls.__name__, sched) as flow:
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        sentry_dsn = EnvVarSecret("SENTRY_DSN")
        sentry_sdk_task = initialize_sentry(sentry_dsn)

        d = create_scraper(cls)
        fetched = fetch(d)
        normalized = normalize(d)
        validated = validate(d)
        done = put(d, connstr)

        d.set_upstream(sentry_sdk_task)
        normalized.set_upstream(fetched)
        validated.set_upstream(normalized)
        done.set_upstream(validated)

    return flow
Example #5
0
def test_run_secret_with_cast(monkeypatch):
    monkeypatch.setenv("FOO", "1")
    e = EnvVarSecret(env_var="FOO", cast=int)
    assert e.run() == 1
Example #6
0
def test_run_secret_without_env_var_set_returns_none_even_if_cast_set(
        monkeypatch):
    monkeypatch.delenv("FOO", raising=False)
    e = EnvVarSecret(env_var="FOO", cast=int)
    assert e.run() is None
Example #7
0
def test_default_cast_is_none():
    e = EnvVarSecret(env_var="FOO")
    assert e.cast is None
Example #8
0
def test_run_secret(monkeypatch):
    monkeypatch.setenv("FOO", "1")
    e = EnvVarSecret(env_var="FOO")
    assert e.run() == "1"
Example #9
0
def test_secret_name_set_at_runtime(monkeypatch):
    monkeypatch.setenv("FOO", "1")
    e = EnvVarSecret()
    assert e.run("FOO") == "1"
Example #10
0
def test_name_can_be_customized():
    e = EnvVarSecret(env_var="FOO", name="BAR")
    assert e.env_var == "FOO"
    assert e.name == "BAR"
Example #11
0
def test_run_secret_without_name_set_returns_none(monkeypatch):
    monkeypatch.delenv("FOO", raising=False)
    e = EnvVarSecret(name="FOO")
    assert e.run() is None
Example #12
0
def test_run_secret_with_cast_datetime(monkeypatch):
    monkeypatch.setenv("FOO", "2019-01-02 03:04:05")
    e = EnvVarSecret(env_var="FOO", cast=pendulum.parse)
    assert e.run() == pendulum.datetime(2019, 1, 2, 3, 4, 5)
Example #13
0
def test_init_with_name():
    e = EnvVarSecret(name="FOO")
    assert e.name == "FOO"
Example #14
0
    blob_client = client.get_blob_client(
        container=container, blob=f"{file_name}_{datetime.datetime.now()}")
    upload_file_path = os.path.join(path, file_name)
    image_content_setting = ContentSettings(content_type='image/jpeg')
    logger, add_utility = prefect.context.get("logger"), logger_helper()
    logger.info(f"Uploading file - {file_name}")

    with open(upload_file_path, "rb") as data:
        blob_client.upload_blob("hello",
                                overwrite=True,
                                content_settings=image_content_setting)


# FLOW DEFINITIONS
with Flow("Upload to Azure") as flow:
    connection = EnvVarSecret("BLOB_STORAGE_KEY")(
        upstream_tasks=[file_name, file_path, blob_container])
    client = start_azure_client(connection=connection)

    single_or_multiple = file_count_check(upstream_tasks=[client])
    with case(single_or_multiple, True):
        upload = upload_image(client=client,
                              file_name=file_name,
                              container=blob_container,
                              path=file_path)
    with case(single_or_multiple, False):
        upload_all_images_in_folder(client=client,
                                    file_name=file_name,
                                    container=blob_container,
                                    path=file_path)

if __name__ == "__main__":
Example #15
0
def test_run_secret_without_name_set_raises(monkeypatch):
    monkeypatch.delenv("FOO", raising=False)
    e = EnvVarSecret(name="FOO", raise_if_missing=True)
    with pytest.raises(ValueError, match="variable not set"):
        e.run()
Example #16
0
def test_run_secret_with_new_name_at_runtime_and_raise_missing(monkeypatch):
    monkeypatch.setenv("FOO", "1")
    e = EnvVarSecret(name="FOO", raise_if_missing=True)
    with pytest.raises(ValueError, match="variable not set"):
        e.run(name="BAR")
Example #17
0
def test_secret_raises_if_no_name_provided():
    e = EnvVarSecret()
    with pytest.raises(ValueError, match="secret name must be provided"):
        e.run()
Example #18
0
def test_create_envvarsecret_requires_env_var():
    with pytest.raises(TypeError,
                       match="required positional argument: 'env_var'"):
        EnvVarSecret()
Example #19
0
    retry_delay=timedelta(minutes=1),
    nout=2,
    trigger=triggers.all_finished,
)
def create_parquet(_success):
    ts = prefect.context.scheduled_start_time
    dt_str = pd.to_datetime(ts).strftime("%Y-%m-%dT%H")
    vintage_fn = FN_STR.format(dt_str) + ".parquet"
    fn = FN_STR.format("") + ".parquet"

    df = pd.read_csv(CSV_FN, parse_dates=["dt"])
    df.to_parquet(DATA_PATH / vintage_fn, index=False)
    df.to_parquet(DATA_PATH / fn, index=False)
    return vintage_fn, fn


@task
def get_gcs_cmd(fn):
    return f"gsutil acl ch -u AllUsers:R gs://can-scrape-outputs/final/{fn}"


shell = ShellTask()
with Flow("UpdateParquetFiles", CronSchedule("10 */2 * * *")) as f:
    connstr = EnvVarSecret("COVID_DB_CONN_URI")
    success = export_to_csv(connstr)
    vintage_fn, fn = create_parquet(success)
    shell(get_gcs_cmd(vintage_fn))
    shell(get_gcs_cmd(fn))

f.register(project_name="can-scrape")
Example #20
0
def test_run_secret_without_env_var_set_raises_with_cast(monkeypatch):
    monkeypatch.delenv("FOO", raising=False)
    e = EnvVarSecret(env_var="FOO", raise_if_missing=True, cast=int)
    with pytest.raises(ValueError, match="variable not set"):
        e.run()
Example #21
0
def test_name_defaults_to_env_var():
    e = EnvVarSecret(env_var="FOO")
    assert e.env_var == "FOO"
    assert e.name == "FOO"
Example #22
0
sql_task = SqlServerFetch(db_name=prefect.config.sql_server.database,
                          user=prefect.config.sql_server.user,
                          host=prefect.config.sql_server.server,
                          query=get_manual_override_rows,
                          fetch='many',
                          fetch_count=3,
                          result=result_formatter,
                          name="SQL-stuff"
                          # commit: bool = False,
                          )
#--------------------------------------------------------------
# Flow context
#--------------------------------------------------------------
with Flow("github_flow") as f:

    password = EnvVarSecret(prefect.config.sql_server.password_var)

    logger = prefect.context.get("logger")
    thing = Parameter("thing", default=["Thing 1"])
    d = dog(thing)

    s = sql_task(password=password)

    v = view_sql(s)

#--------------------------------------------------------------
# Closing Details
#--------------------------------------------------------------
f.run_config = LocalRun(env={
    "PREFECT__USER_CONFIG_PATH":
    '/Users/peytonrunyan/TRP/prefect/config.toml'
Example #23
0
def test_run_secret_with_new_name_at_runtime(monkeypatch):
    monkeypatch.setenv("FOO", "1")
    e = EnvVarSecret(name="FOO")
    assert e.run(name="BAR") is None