def test_add_flow_raises_if_name_conflict(): storage = Memory() f = Flow("test") res = storage.add_flow(f) g = Flow("test") with pytest.raises(ValueError, match='name "test"'): storage.add_flow(g)
def test_add_flow_to_storage(): storage = Memory() f = Flow("test") assert f.name not in storage res = storage.add_flow(f) assert res == "test" assert f.name in storage
def test_add_flow_raises_if_name_conflict(): storage = Memory() f = Flow("test") res = storage.add_flow(f) g = Flow("test") with pytest.raises(ValueError) as exc: storage.add_flow(g) assert 'name "test"' in str(exc.value)
def test_get_runner_returns_flow_or_flow_runner_responds_to_config(): s = Memory() f = Flow("test") s.add_flow(f) with set_temporary_config( {"engine.flow_runner.default_class": CloudFlowRunner}): runner = s.get_runner("test", return_flow=False) assert isinstance(runner, CloudFlowRunner) assert runner.flow is f
def test_containment(): s = Memory() f = Flow("test") s.add_flow(f) assert True not in s assert f not in s assert "test" in s assert Flow("other") not in s assert "other" not in s
def test_get_parametrised_workflows(test_logger): """ Test that get_parametrised_workflows correctly combines workflow parameters with dates. """ workflow_storage = Memory() workflow_storage.add_flow(prefect.Flow(name="WORKFLOW_1")) workflow_storage.add_flow(prefect.Flow(name="WORKFLOW_2")) workflow_configs = [ WorkflowConfig(workflow_name="WORKFLOW_1"), WorkflowConfig( workflow_name="WORKFLOW_2", parameters={"DUMMY_PARAM": "DUMMY_VALUE"}, date_stencil=DateStencil([-1, 0]), ), ] lists_of_dates = [ [ pendulum.date(2016, 1, 1), pendulum.date(2016, 1, 2), pendulum.date(2016, 1, 3), ], [pendulum.date(2016, 1, 4), pendulum.date(2016, 1, 5)], ] with prefect.context(logger=test_logger): parametrised_workflows = get_parametrised_workflows.run( workflow_configs=workflow_configs, lists_of_dates=lists_of_dates, workflow_storage=workflow_storage, ) assert len(parametrised_workflows) == 5 for i, refdate in enumerate(lists_of_dates[0]): assert parametrised_workflows[i][0].name == "WORKFLOW_1" assert parametrised_workflows[i][1] == { "reference_date": refdate, "date_ranges": [(refdate, refdate.add(days=1))], } for i in [3, 4]: assert parametrised_workflows[i][0].name == "WORKFLOW_2" assert parametrised_workflows[i][1] == { "DUMMY_PARAM": "DUMMY_VALUE", "reference_date": pendulum.date(2016, 1, i + 1), "date_ranges": [ (pendulum.date(2016, 1, i), pendulum.date(2016, 1, i + 1)), (pendulum.date(2016, 1, i + 1), pendulum.date(2016, 1, i + 2)), ], }
def test_environment_execute(): global_dict = {} @prefect.task def add_to_dict(): global_dict["run"] = True environment = LocalEnvironment() storage = Memory() flow = prefect.Flow("test", tasks=[add_to_dict]) flow_loc = storage.add_flow(flow) environment.execute(storage, flow_loc) assert global_dict.get("run") is True
def test_environment_execute_with_kwargs(): global_dict = {} @prefect.task def add_to_dict(x): global_dict["result"] = x environment = LocalEnvironment() storage = Memory() with prefect.Flow("test") as flow: x = prefect.Parameter("x") add_to_dict(x) flow_loc = storage.add_flow(flow) environment.execute(storage, flow_loc, x=42) assert global_dict.get("result") == 42
def test_execute_improper_storage(): with tempfile.TemporaryDirectory() as directory: with open(os.path.join(directory, "job.yaml"), "w+") as file: file.write("job") environment = KubernetesJobEnvironment( job_spec_file=os.path.join(directory, "job.yaml")) with pytest.raises(TypeError): environment.execute(storage=Memory(), flow_location="")
def test_environment_execute_calls_callbacks(): start_func = MagicMock() exit_func = MagicMock() global_dict = {} @prefect.task def add_to_dict(): global_dict["run"] = True environment = LocalEnvironment(on_start=start_func, on_exit=exit_func) storage = Memory() flow = prefect.Flow("test", tasks=[add_to_dict]) flow_loc = storage.add_flow(flow) environment.execute(storage, flow_loc) assert global_dict.get("run") is True assert start_func.called assert exit_func.called
def test_build_returns_self(): s = Memory() assert s.build() is s f = Flow("test") s.add_flow(f) assert s.build() is s
def test_multiple_flows_in_storage(): s = Memory() f = Flow("test") g = Flow("other") z = Flow("not") s.add_flow(f) s.add_flow(g) assert "test" in s assert "other" in s assert "not" not in s assert s.get_flow("test") is f assert s.get_flow("other") is g assert s.flows["test"] is f assert s.flows["other"] is g
def test_execute_improper_storage(): environment = CloudEnvironment() with pytest.raises(TypeError): environment.execute(storage=Memory(), flow_location="")
def test_available_dates_sensor_retries(monkeypatch, postgres_test_db): """ Test that the available_dates_sensor flow re-runs workflows that failed on the previous attempt, and does not re-run them again once they have succeeded. """ # Mock flowclient flowclient_available_dates = { "dummy_cdr_type": ["2016-01-01", "2016-01-02", "2016-01-03"] } monkeypatch.setattr("flowclient.get_available_dates", lambda connection: flowclient_available_dates) monkeypatch.setattr("flowclient.connect", Mock()) monkeypatch.setenv("FLOWAPI_TOKEN", "DUMMY_TOKEN") # Mock workflows dummy_workflow = Mock() dummy_workflow.name = "DUMMY_WORKFLOW" dummy_workflow.run.side_effect = [Failed(), Success(), Success()] workflow_storage = Memory() workflow_storage.add_flow(dummy_workflow) workflow_configs = [WorkflowConfig(workflow_name="DUMMY_WORKFLOW")] # Run available dates sensor with set_temporary_config({ "flowapi_url": "DUMMY_URL", "db_uri": postgres_test_db.url() }): flow_state = available_dates_sensor.run( workflow_configs=workflow_configs, workflow_storage=workflow_storage) # Check that sensor flow ended in a 'failed' state, and dummy_workflow.run() was called 3 times assert flow_state.is_failed dummy_workflow.run.assert_has_calls([ call( parameters=dict(reference_date=d, date_ranges=[(d, d.add(days=1))]), run_on_schedule=False, ) for d in pendulum.period(pendulum.date(2016, 1, 1), pendulum.date(2016, 1, 3)) ]) # Reset workflow mock dummy_workflow.reset_mock() dummy_workflow.run.side_effect = None dummy_workflow.run.return_value = Success() # Run available dates sensor again with set_temporary_config({ "flowapi_url": "DUMMY_URL", "db_uri": postgres_test_db.url() }): flow_state = available_dates_sensor.run( workflow_configs=workflow_configs, workflow_storage=workflow_storage) # Check that sensor flow was successful, and dummy_workflow only re-ran for the date for which it previously failed assert flow_state.is_successful dummy_workflow.run.assert_called_once_with( parameters=dict( reference_date=pendulum.date(2016, 1, 1), date_ranges=[(pendulum.date(2016, 1, 1), pendulum.date(2016, 1, 2))], ), run_on_schedule=False, ) # Reset workflow mock again dummy_workflow.reset_mock() # Run available dates sensor once more with set_temporary_config({ "flowapi_url": "DUMMY_URL", "db_uri": postgres_test_db.url() }): flow_state = available_dates_sensor.run( workflow_configs=workflow_configs, workflow_storage=workflow_storage) # Check that dummy_workflow did not run again, now that it has run successfully assert flow_state.is_successful dummy_workflow.run.assert_not_called()
def test_get_flow_returns_flow(): s = Memory() f = Flow("test") s.add_flow(f) runner = s.get_flow("test") assert runner is f
def test_get_flow_raises_if_flow_not_present(): s = Memory() with pytest.raises(ValueError): s.get_flow("test")
def test_get_env_runner_raises(): s = Memory() with pytest.raises(NotImplementedError): s.get_env_runner("")
def test_available_dates_sensor(monkeypatch, postgres_test_db): """ Test that the available_dates_sensor flow runs the specified workflows with the correct parameters, and does not run successful workflow runs more than once for the same date. """ # Mock flowclient flowclient_available_dates = { "cdr_type_1": ["2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04"], "cdr_type_2": ["2016-01-04", "2016-01-05", "2016-01-06", "2016-01-07"], "cdr_type_3": ["2016-01-08"], } monkeypatch.setattr("flowclient.get_available_dates", lambda connection: flowclient_available_dates) monkeypatch.setattr("flowclient.connect", Mock()) monkeypatch.setenv("FLOWAPI_TOKEN", "DUMMY_TOKEN") # Mock workflows workflow_1 = Mock() workflow_1.name = "WORKFLOW_1" workflow_1.run.return_value = Success() workflow_2 = Mock() workflow_2.name = "WORKFLOW_2" workflow_2.run.return_value = Success() workflow_storage = Memory() workflow_storage.add_flow(workflow_1) workflow_storage.add_flow(workflow_2) workflow_configs = [ WorkflowConfig( workflow_name="WORKFLOW_1", parameters={"DUMMY_PARAM_1": "DUMMY_VALUE_1"}, earliest_date=pendulum.date(2016, 1, 4), ), WorkflowConfig( workflow_name="WORKFLOW_2", parameters={"DUMMY_PARAM_2": "DUMMY_VALUE_2"}, date_stencil=DateStencil([[pendulum.date(2016, 1, 3), -1], -1, 0]), ), ] # Run available dates sensor with set_temporary_config({ "flowapi_url": "DUMMY_URL", "db_uri": postgres_test_db.url() }): flow_state = available_dates_sensor.run( cdr_types=["cdr_type_1", "cdr_type_2"], workflow_configs=workflow_configs, workflow_storage=workflow_storage, ) # Check that run was successful and workflows were run with the correct parameters assert flow_state.is_successful workflow_1.run.assert_has_calls([ call( parameters=dict( reference_date=d, date_ranges=[(d, d.add(days=1))], DUMMY_PARAM_1="DUMMY_VALUE_1", ), run_on_schedule=False, ) for d in pendulum.period(pendulum.date(2016, 1, 4), pendulum.date(2016, 1, 7)) ]) workflow_2.run.assert_has_calls([ call( parameters=dict( reference_date=d, date_ranges=[ (pendulum.date(2016, 1, 3), d.subtract(days=1)), (d.subtract(days=1), d), (d, d.add(days=1)), ], DUMMY_PARAM_2="DUMMY_VALUE_2", ), run_on_schedule=False, ) for d in pendulum.period(pendulum.date(2016, 1, 5), pendulum.date(2016, 1, 7)) ]) # Reset workflow mocks workflow_1.reset_mock() workflow_2.reset_mock() # Add more available dates flowclient_available_dates = { "cdr_type_1": ["2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04"], "cdr_type_2": [ "2016-01-04", "2016-01-05", "2016-01-06", "2016-01-07", "2016-01-08", ], "cdr_type_3": ["2016-01-08"], } monkeypatch.setattr("flowclient.get_available_dates", lambda connection: flowclient_available_dates) # Run available dates sensor again with set_temporary_config({ "flowapi_url": "DUMMY_URL", "db_uri": postgres_test_db.url() }): flow_state = available_dates_sensor.run( cdr_types=["cdr_type_1", "cdr_type_2"], workflow_configs=workflow_configs, workflow_storage=workflow_storage, ) # Check that workflows only ran for the new date workflow_1.run.assert_called_once_with( parameters=dict( reference_date=pendulum.date(2016, 1, 8), date_ranges=[(pendulum.date(2016, 1, 8), pendulum.date(2016, 1, 9))], DUMMY_PARAM_1="DUMMY_VALUE_1", ), run_on_schedule=False, ) workflow_2.run.assert_called_once_with( parameters=dict( reference_date=pendulum.date(2016, 1, 8), date_ranges=[ (pendulum.date(2016, 1, 3), pendulum.date(2016, 1, 7)), (pendulum.date(2016, 1, 7), pendulum.date(2016, 1, 8)), (pendulum.date(2016, 1, 8), pendulum.date(2016, 1, 9)), ], DUMMY_PARAM_2="DUMMY_VALUE_2", ), run_on_schedule=False, )
def test_create_memory_storage(): storage = Memory() assert storage