Esempio n. 1
0
def test_required_parameters_must_be_provided():
    flow = Flow(name="test")
    y = prefect.Parameter("y")
    flow.add_task(y)
    flow_state = FlowRunner(flow=flow).run(return_tasks=[y])
    assert isinstance(flow_state, Failed)
    assert isinstance(flow_state.result[y], Failed)
    assert "required but not provided" in str(flow_state.result[y]).lower()
Esempio n. 2
0
 def test_init_errors_if_tasks_passed_to_parameters(self, cloud_api):
     with pytest.raises(TypeError,
                        match="An instance of `Task` was passed"):
         StartFlowRun(name="testing",
                      parameters={
                          "a": 1,
                          "b": prefect.Parameter("b")
                      })
Esempio n. 3
0
def test_parameters_are_placed_into_context():
    flow = Flow(name="test")
    y = prefect.Parameter("y", default=99)
    flow.add_task(y)
    flow_state = FlowRunner(flow=flow).run(return_tasks=[y],
                                           parameters=dict(y=42))
    assert isinstance(flow_state, Success)
    assert flow_state.result[y].result == 42
Esempio n. 4
0
    async def test_create_run_with_missing_parameters_raises_error(self, ):

        flow_id = await flows.create_flow(serialized_flow=prefect.Flow(
            name="test", tasks=[prefect.Parameter("x")]).serialize(), )

        with pytest.raises(ValueError) as exc:
            await runs.create_flow_run(flow_id=flow_id)

        assert "Required parameters were not supplied" in str(exc.value)
Esempio n. 5
0
def test_parameters_can_be_set_in_context_if_none_passed():
    x = prefect.Parameter("x")
    f = FlowRunner(Flow(name="test", tasks=[x]))
    state = f.run(parameters={},
                  context={"parameters": {
                      "x": 5
                  }},
                  return_tasks=[x])
    assert state.result[x].result == 5
Esempio n. 6
0
def test_parameters_overwrite_context():
    x = prefect.Parameter("x")
    f = FlowRunner(Flow(name="test", tasks=[x]))
    state = f.run(parameters={"x": 2},
                  context={"parameters": {
                      "x": 5
                  }},
                  return_tasks=[x])
    assert state.result[x].result == 2
Esempio n. 7
0
    async def test_set_schedule_active_handles_scheduled_param_defaults(
            self, project_id):
        a, b = prefect.Parameter("a"), prefect.Parameter("b", default=1)
        clock = prefect.schedules.clocks.CronClock(cron=f"* * * * *",
                                                   parameter_defaults={
                                                       "a": 1,
                                                       "b": 2
                                                   })
        schedule = prefect.schedules.Schedule(clocks=[clock])

        flow = prefect.Flow("test-params", tasks=[a, b], schedule=schedule)

        flow_id = await api.flows.create_flow(
            project_id=project_id,
            serialized_flow=flow.serialize(),
            set_schedule_active=False,
        )
        assert flow_id
        assert await api.flows.set_schedule_active(flow_id=flow_id)
Esempio n. 8
0
def test_auto_manage_dataframe_incorrect_type(local_file, caplog):
    task = TaskWithInputDataFrame()

    with Flow('test_auto_manage_dataframe_default_string') as flow:
        file = prefect.Parameter('local_file', default=123456)
        task(input_one=file)

    with pytest.raises(ValueError), caplog.at_level('FATAL', logger='prefect'):
        with raise_on_exception(), prefect.context(caches={}):
            flow.run()
Esempio n. 9
0
    async def test_create_flow_registers_flow_even_when_required_params(
            self, project_id):
        """
        We allow Flow registration to proceed even when there are required
        params, but we don't set the schedule to active.
        """
        a, b = prefect.Parameter("a"), prefect.Parameter("b", default=1)
        clock = prefect.schedules.clocks.CronClock(cron=f"* * * * *")
        schedule = prefect.schedules.Schedule(clocks=[clock])

        flow = prefect.Flow("test-params", tasks=[a, b], schedule=schedule)

        flow_id = await api.flows.create_flow(project_id=project_id,
                                              serialized_flow=flow.serialize())
        assert flow_id

        db_flow = await models.Flow.where(id=flow_id
                                          ).first({"is_schedule_active"})
        assert db_flow.is_schedule_active is False
Esempio n. 10
0
def test_client_register_doesnt_raise_for_scheduled_params(
        patch_post, compressed, monkeypatch):
    if compressed:
        response = {
            "data": {
                "project": [{
                    "id": "proj-id"
                }],
                "create_flow_from_compressed_string": {
                    "id": "long-id"
                },
            }
        }
    else:
        response = {
            "data": {
                "project": [{
                    "id": "proj-id"
                }],
                "create_flow": {
                    "id": "long-id"
                }
            }
        }
    patch_post(response)

    monkeypatch.setattr("prefect.client.Client.get_default_tenant_slug",
                        MagicMock(return_value="tslug"))

    with set_temporary_config({
            "cloud.api": "http://my-cloud.foo",
            "cloud.auth_token": "secret_token"
    }):
        client = Client()

    a = prefect.schedules.clocks.DatesClock(
        [pendulum.now("UTC").add(seconds=0.1)], parameter_defaults=dict(x=1))
    b = prefect.schedules.clocks.DatesClock(
        [pendulum.now("UTC").add(seconds=0.25)], parameter_defaults=dict(x=2))

    x = prefect.Parameter("x", required=True)

    flow = prefect.Flow("test",
                        schedule=prefect.schedules.Schedule(clocks=[a, b]),
                        tasks=[x])
    flow.storage = prefect.environments.storage.Memory()
    flow.result_handler = flow.storage.result_handler

    flow_id = client.register(
        flow,
        project_name="my-default-project",
        compressed=compressed,
        version_group_id=str(uuid.uuid4()),
    )
    assert flow_id == "long-id"
Esempio n. 11
0
    async def test_create_run_uses_default_flow_parameters(self, project_id):
        flow_id = await api.flows.create_flow(
            project_id=project_id,
            serialized_flow=prefect.Flow(
                name="test", tasks=[prefect.Parameter("x",
                                                      default=1)]).serialize(),
        )

        flow_run_id = await api.runs.create_flow_run(flow_id=flow_id)
        flow_run = await models.FlowRun.where(id=flow_run_id
                                              ).first({"parameters"})
        assert flow_run.parameters == dict(x=1)
Esempio n. 12
0
def test_client_register_raises_if_required_param_isnt_scheduled(
        patch_post, monkeypatch, tmpdir):
    response = {
        "data": {
            "project": [{
                "id": "proj-id"
            }],
            "create_flow": {
                "id": "long-id"
            }
        }
    }
    patch_post(response)

    monkeypatch.setattr("prefect.client.Client.get_default_tenant_slug",
                        MagicMock(return_value="tslug"))

    with set_temporary_config({
            "cloud.api": "http://my-cloud.foo",
            "cloud.auth_token": "secret_token",
            "backend": "cloud",
    }):
        client = Client()

    a = prefect.schedules.clocks.DatesClock(
        [pendulum.now("UTC").add(seconds=0.1)], parameter_defaults=dict(x=1))
    b = prefect.schedules.clocks.DatesClock(
        [pendulum.now("UTC").add(seconds=0.25)], parameter_defaults=dict(y=2))

    x = prefect.Parameter("x", required=True)

    flow = prefect.Flow("test",
                        schedule=prefect.schedules.Schedule(clocks=[a, b]),
                        tasks=[x])
    flow.storage = prefect.environments.storage.Local(tmpdir)
    flow.result = flow.storage.result

    with pytest.raises(
            ClientError,
            match=
            "Flows with required parameters can not be scheduled automatically",
    ):
        flow_id = client.register(
            flow,
            project_name="my-default-project",
            compressed=False,
            version_group_id=str(uuid.uuid4()),
            no_url=True,
        )
Esempio n. 13
0
 async def test_set_schedule_active_with_required_parameters(
         self, project_id):
     flow = prefect.Flow(
         name="test",
         tasks=[prefect.Parameter("p", required=True)],
         schedule=prefect.schedules.IntervalSchedule(
             start_date=pendulum.now("EST"),
             interval=datetime.timedelta(minutes=1)),
     )
     flow_id = await api.flows.create_flow(
         serialized_flow=flow.serialize(),
         project_id=project_id,
         set_schedule_active=False,
     )
     with pytest.raises(ValueError, match="required parameters"):
         await api.flows.set_schedule_active(flow_id=flow_id)
Esempio n. 14
0
def test_auto_manage_dataframe_init_overwrite(local_file):
    task = TaskWithInputDataFrame(input_one_key='/bar')

    with Flow('test_auto_manage_dataframe_init_overwrite') as flow:
        file = prefect.Parameter('local_file', default=local_file)
        task(input_one=file)

    with raise_on_exception(), prefect.context(caches={}):
        flow_state = flow.run()

    result = list(flow_state.result.values())[0].result

    df_bar = pd.read_hdf(local_file.file, '/bar')
    assert isinstance(df_bar, pd.DataFrame)

    tm.assert_equal(result, df_bar.mean())
Esempio n. 15
0
async def flow_id():
    flow = prefect.Flow(
        name="Test Flow",
        schedule=prefect.schedules.IntervalSchedule(
            start_date=pendulum.datetime(2018, 1, 1),
            interval=datetime.timedelta(days=1),
        ),
    )
    flow.add_edge(
        prefect.Task("t1", tags={"red", "blue"}),
        prefect.Task("t2", tags={"red", "green"}),
    )
    flow.add_task(prefect.Parameter("x", default=1))

    flow_id = await api.flows.create_flow(serialized_flow=flow.serialize())

    return flow_id
def test_environment_execute_with_kwargs():
    global_dict = {}

    @prefect.task
    def add_to_dict(x):
        global_dict["result"] = x

    environment = LocalEnvironment()
    storage = Memory()
    with prefect.Flow("test") as flow:
        x = prefect.Parameter("x")
        add_to_dict(x)

    flow_loc = storage.add_flow(flow)

    environment.execute(storage, flow_loc, x=42)
    assert global_dict.get("result") == 42
Esempio n. 17
0
def test_auto_manage_dataframe_default_string(local_file):
    task = TaskWithInputDataFrame()
    filename = local_file.file.resolve()

    with Flow('test_auto_manage_dataframe_default_string') as flow:
        file = prefect.Parameter('local_file', default=filename)
        task(input_one=file)

    with raise_on_exception(), prefect.context(caches={}):
        flow_state = flow.run()

    result = list(flow_state.result.values())[0].result

    df_foo = pd.read_hdf(local_file.file, '/foo')
    assert isinstance(df_foo, pd.DataFrame)

    tm.assert_equal(result, df_foo.mean())
Esempio n. 18
0
    async def test_schedule_creates_parametrized_flow_runs(self, ):
        clock1 = prefect.schedules.clocks.IntervalClock(
            start_date=pendulum.now("UTC").add(minutes=1),
            interval=datetime.timedelta(minutes=2),
            parameter_defaults=dict(x="a"),
        )
        clock2 = prefect.schedules.clocks.IntervalClock(
            start_date=pendulum.now("UTC"),
            interval=datetime.timedelta(minutes=2),
            parameter_defaults=dict(x="b"),
        )

        flow = prefect.Flow(
            name="Test Scheduled Flow",
            schedule=prefect.schedules.Schedule(clocks=[clock1, clock2]),
        )
        flow.add_task(prefect.Parameter("x", default=1))
        flow_id = await api.flows.create_flow(serialized_flow=flow.serialize())
        schedule = await m.Schedule.where({
            "flow_id": {
                "_eq": flow_id
            }
        }).first("id")
        await m.Schedule.where(id=schedule.id
                               ).update(set={
                                   "last_checked": None,
                                   "last_scheduled_run_time": None
                               })
        await m.FlowRun.where({"flow_id": {"_eq": flow_id}}).delete()
        assert len(await api.schedules.schedule_flow_runs(schedule.id)) == 10

        flow_runs = await m.FlowRun.where({
            "flow_id": {
                "_eq": flow_id
            }
        }).get(
            selection_set={
                "parameters": True,
                "scheduled_start_time": True
            },
            order_by={"scheduled_start_time": EnumValue("asc")},
        )

        assert all([fr.parameters == dict(x="a") for fr in flow_runs[::2]])
        assert all([fr.parameters == dict(x="b") for fr in flow_runs[1::2]])
Esempio n. 19
0
def test_auto_manage_dataframe_many_inputs(local_file):
    task = TaskWithTwoInputDataFrames()

    with Flow('test_auto_manage_dataframe_many_inputs') as flow:
        file = prefect.Parameter('local_file', default=local_file)
        task(input_one=file, input_two=file, offset=1.23)

    with raise_on_exception(), prefect.context(caches={}):
        flow_state = flow.run()

    result = list(flow_state.result.values())[0].result

    df_foo = pd.read_hdf(local_file.file, '/foo')
    df_bar = pd.read_hdf(local_file.file, '/bar')
    assert isinstance(df_foo, pd.DataFrame)
    assert isinstance(df_bar, pd.DataFrame)

    tm.assert_equal(result, df_foo.mean() + df_bar.mean() + 1.23)
Esempio n. 20
0
def test_task_failure_caches_inputs_automatically(client):
    @prefect.task(max_retries=2, retry_delay=timedelta(seconds=100))
    def is_p_three(p):
        if p == 3:
            raise ValueError("No thank you.")

    with prefect.Flow("test") as f:
        p = prefect.Parameter("p")
        res = is_p_three(p)

    state = CloudFlowRunner(flow=f).run(return_tasks=[res], parameters=dict(p=3))
    assert state.is_running()
    assert isinstance(state.result[res], Retrying)
    assert state.result[res].cached_inputs["p"].location == "3"

    last_state = client.set_task_run_state.call_args_list[-1][-1]["state"]
    assert isinstance(last_state, Retrying)
    assert last_state.cached_inputs["p"].location == "3"
Esempio n. 21
0
async def labeled_flow_id(project_id):
    flow = prefect.Flow(
        name="Labeled Flow",
        run_config=prefect.run_configs.UniversalRun(labels=["foo", "bar"]),
        schedule=prefect.schedules.IntervalSchedule(
            start_date=pendulum.datetime(2018, 1, 1),
            interval=datetime.timedelta(days=1),
        ),
    )
    flow.add_edge(
        prefect.Task("t1", tags={"red", "blue"}),
        prefect.Task("t2", tags={"red", "green"}),
    )
    flow.add_task(prefect.Parameter("x", default=1))

    flow_id = await api.flows.create_flow(project_id=project_id,
                                          serialized_flow=flow.serialize())
    return flow_id
Esempio n. 22
0
def test_run_workflow_ignores_schedule(test_logger):
    """
    Test that run_workflow ignores the workflow's schedule.
    """
    function_mock = create_autospec(lambda dummy_param: None)
    # Flow with no more scheduled runs
    with prefect.Flow(
            "Dummy_workflow",
            schedule=CronSchedule("0 0 * * *",
                                  end_date=pendulum.now().subtract(days=2)),
    ) as dummy_workflow:
        dummy_param = prefect.Parameter("dummy_param")
        FunctionTask(function_mock)(dummy_param=dummy_param)

    with prefect.context(logger=test_logger):
        run_workflow.run(
            parametrised_workflow=(dummy_workflow,
                                   dict(dummy_param="DUMMY_VALUE")))
    function_mock.assert_called_once_with(dummy_param="DUMMY_VALUE")
Esempio n. 23
0
async def labeled_flow_id():

    flow = prefect.Flow(
        name="Labeled Flow",
        environment=prefect.environments.execution.remote.RemoteEnvironment(
            labels=["foo", "bar"]),
        schedule=prefect.schedules.IntervalSchedule(
            start_date=pendulum.datetime(2018, 1, 1),
            interval=datetime.timedelta(days=1),
        ),
    )
    flow.add_edge(
        prefect.Task("t1", tags={"red", "blue"}),
        prefect.Task("t2", tags={"red", "green"}),
    )
    flow.add_task(prefect.Parameter("x", default=1))

    flow_id = await api.flows.create_flow(serialized_flow=flow.serialize())
    return flow_id
Esempio n. 24
0
def create_cdc_single_state_flow():
    with Flow(CDCCovidDataTracker.__name__) as flow:
        state = prefect.Parameter("state")
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        sentry_dsn = EnvVarSecret("SENTRY_DSN")
        sentry_sdk_task = initialize_sentry(sentry_dsn)

        d = create_scraper(CDCCovidDataTracker, state=state)
        fetched = fetch(d)
        normalized = normalize(d)
        validated = validate(d)
        done = put(d, connstr)

        d.set_upstream(sentry_sdk_task)
        normalized.set_upstream(fetched)
        validated.set_upstream(normalized)
        done.set_upstream(validated)

    return flow
Esempio n. 25
0
def test_client_deploy_rejects_setting_active_schedules_for_flows_with_req_params(
        active, monkeypatch):
    post = MagicMock()
    monkeypatch.setattr("requests.post", post)
    with set_temporary_config({
            "cloud.graphql": "http://my-cloud.foo",
            "cloud.auth_token": "secret_token"
    }):
        client = Client()

    flow = prefect.Flow(name="test", schedule=prefect.schedules.Schedule())
    flow.add_task(prefect.Parameter("x", required=True))

    with pytest.raises(ClientError) as exc:
        result = client.deploy(flow,
                               project_name="my-default-project",
                               set_schedule_active=active)
    assert (str(
        exc.value
    ) == "Flows with required parameters can not be scheduled automatically.")
Esempio n. 26
0
    async def test_set_schedule_active_handles_flow_group_defaults(
            self, project_id):
        flow = prefect.Flow(
            name="test",
            tasks=[prefect.Parameter("p", required=True)],
            schedule=prefect.schedules.IntervalSchedule(
                start_date=pendulum.now("EST"),
                interval=datetime.timedelta(minutes=1)),
        )
        flow_id = await api.flows.create_flow(
            serialized_flow=flow.serialize(),
            project_id=project_id,
            set_schedule_active=False,
        )

        # set a default for "p" at the flow group level
        flow_group = await models.Flow.where(id=flow_id
                                             ).first({"flow_group_id"})
        await models.FlowGroup.where(id=flow_group.flow_group_id).update(
            set=dict(default_parameters={"p": 1}))

        assert await api.flows.set_schedule_active(flow_id=flow_id) is True
Esempio n. 27
0
def test_run_workflow(test_logger):
    """
    Test that the run_workflow task runs a workflow with the given parameters.
    """
    function_mock = create_autospec(lambda dummy_param: None)

    with prefect.Flow("Dummy workflow") as dummy_workflow:
        dummy_param = prefect.Parameter("dummy_param")
        FunctionTask(function_mock)(dummy_param=dummy_param)

    runner = TaskRunner(task=run_workflow)
    upstream_edge = Edge(prefect.Task(),
                         run_workflow,
                         key="parametrised_workflow")
    task_state = runner.run(
        upstream_states={
            upstream_edge:
            Success(result=(dummy_workflow, dict(dummy_param="DUMMY_VALUE")))
        },
        context=dict(logger=test_logger),
    )
    assert task_state.is_successful()
    function_mock.assert_called_once_with(dummy_param="DUMMY_VALUE")
Esempio n. 28
0
def test_run_workflow_fails(test_logger):
    """
    Test that the run_workflow task fails if the workflow fails.
    """
    function_mock = create_autospec(lambda dummy_param: None,
                                    side_effect=Exception("Workflow failed"))

    with prefect.Flow("Dummy workflow") as dummy_workflow:
        dummy_param = prefect.Parameter("dummy_param")
        FunctionTask(function_mock)(dummy_param=dummy_param)

    runner = TaskRunner(task=run_workflow)
    upstream_edge = Edge(prefect.Task(),
                         run_workflow,
                         key="parametrised_workflow")
    task_state = runner.run(
        upstream_states={
            upstream_edge:
            Success(result=(dummy_workflow, dict(dummy_param="DUMMY_VALUE")))
        },
        context=dict(logger=test_logger),
    )
    assert task_state.is_failed()
Esempio n. 29
0

@prefect.task
def processing1(fp: str):
    logger = prefect.context.get("logger")
    logger.info(f"Doing some processing1 on {fp} ...")


@prefect.task
def processing2(fp: str):
    logger = prefect.context.get("logger")
    logger.info(f"Doing some processing2 on {fp} ...")


with prefect.Flow("gfs-post-processing", result=PrefectResult()) as flow:
    fp = prefect.Parameter("fp")

    p1 = processing1(fp)
    p2 = processing2(fp)
    p2.set_upstream(p1)

repo_ref = os.getenv("DATAFETCH__STORAGE__REPO__REF", default="master")
print(f"Registering Using GitHub repo ref {repo_ref}")
flow.storage = GitHub(repo="steph-ben/datafetch-config",
                      ref=repo_ref,
                      path="projects/gfs/post_process.py",
                      secrets=["GITHUB_ACCESS_TOKEN"])
flow.run_config = DockerRun()

if __name__ == "__main__":
    from datafetch.utils import show_prefect_cli_helper
Esempio n. 30
0

@task
def transform(df):
    df.columns = df.columns.str.strip('() ')
    return df


@task
def load(df, extract_name):

    connect = f"postgresql+psycopg2://%s:%s@%s:{db_port.get()}/%s" % (
        db_user.get(), db_pass.get(), db_host.get(), db_db.get())

    engine = create_engine(connect)
    df.to_sql(f'{extract_name}',
              con=engine,
              index=False,
              schema='raw',
              if_exists='replace')


with Flow("ETL") as flow:
    extract_name = prefect.Parameter('extract_name', default='goodreads')
    e = extract(extract_name)
    t = transform(e)
    l = load(t, extract_name)

flow.register(project_name="Quantified Self")
flow.run_agent()