def test_add_flow_raises_if_name_conflict():
    storage = Memory()
    f = Flow("test")
    res = storage.add_flow(f)
    g = Flow("test")
    with pytest.raises(ValueError, match='name "test"'):
        storage.add_flow(g)
def test_add_flow_to_storage():
    storage = Memory()
    f = Flow("test")
    assert f.name not in storage
    res = storage.add_flow(f)
    assert res == "test"
    assert f.name in storage
def test_add_flow_raises_if_name_conflict():
    storage = Memory()
    f = Flow("test")
    res = storage.add_flow(f)
    g = Flow("test")
    with pytest.raises(ValueError) as exc:
        storage.add_flow(g)
    assert 'name "test"' in str(exc.value)
Example #4
0
def test_get_runner_returns_flow_or_flow_runner_responds_to_config():
    s = Memory()
    f = Flow("test")
    s.add_flow(f)

    with set_temporary_config(
        {"engine.flow_runner.default_class": CloudFlowRunner}):
        runner = s.get_runner("test", return_flow=False)
        assert isinstance(runner, CloudFlowRunner)
        assert runner.flow is f
def test_containment():
    s = Memory()
    f = Flow("test")
    s.add_flow(f)

    assert True not in s
    assert f not in s
    assert "test" in s
    assert Flow("other") not in s
    assert "other" not in s
Example #6
0
def test_get_parametrised_workflows(test_logger):
    """
    Test that get_parametrised_workflows correctly combines workflow parameters with dates.
    """
    workflow_storage = Memory()
    workflow_storage.add_flow(prefect.Flow(name="WORKFLOW_1"))
    workflow_storage.add_flow(prefect.Flow(name="WORKFLOW_2"))

    workflow_configs = [
        WorkflowConfig(workflow_name="WORKFLOW_1"),
        WorkflowConfig(
            workflow_name="WORKFLOW_2",
            parameters={"DUMMY_PARAM": "DUMMY_VALUE"},
            date_stencil=DateStencil([-1, 0]),
        ),
    ]
    lists_of_dates = [
        [
            pendulum.date(2016, 1, 1),
            pendulum.date(2016, 1, 2),
            pendulum.date(2016, 1, 3),
        ],
        [pendulum.date(2016, 1, 4),
         pendulum.date(2016, 1, 5)],
    ]

    with prefect.context(logger=test_logger):
        parametrised_workflows = get_parametrised_workflows.run(
            workflow_configs=workflow_configs,
            lists_of_dates=lists_of_dates,
            workflow_storage=workflow_storage,
        )

    assert len(parametrised_workflows) == 5
    for i, refdate in enumerate(lists_of_dates[0]):
        assert parametrised_workflows[i][0].name == "WORKFLOW_1"
        assert parametrised_workflows[i][1] == {
            "reference_date": refdate,
            "date_ranges": [(refdate, refdate.add(days=1))],
        }
    for i in [3, 4]:
        assert parametrised_workflows[i][0].name == "WORKFLOW_2"
        assert parametrised_workflows[i][1] == {
            "DUMMY_PARAM":
            "DUMMY_VALUE",
            "reference_date":
            pendulum.date(2016, 1, i + 1),
            "date_ranges": [
                (pendulum.date(2016, 1, i), pendulum.date(2016, 1, i + 1)),
                (pendulum.date(2016, 1, i + 1), pendulum.date(2016, 1, i + 2)),
            ],
        }
def test_environment_execute():
    global_dict = {}

    @prefect.task
    def add_to_dict():
        global_dict["run"] = True

    environment = LocalEnvironment()
    storage = Memory()
    flow = prefect.Flow("test", tasks=[add_to_dict])
    flow_loc = storage.add_flow(flow)

    environment.execute(storage, flow_loc)
    assert global_dict.get("run") is True
def test_environment_execute_with_kwargs():
    global_dict = {}

    @prefect.task
    def add_to_dict(x):
        global_dict["result"] = x

    environment = LocalEnvironment()
    storage = Memory()
    with prefect.Flow("test") as flow:
        x = prefect.Parameter("x")
        add_to_dict(x)

    flow_loc = storage.add_flow(flow)

    environment.execute(storage, flow_loc, x=42)
    assert global_dict.get("result") == 42
Example #9
0
def test_execute_improper_storage():
    with tempfile.TemporaryDirectory() as directory:

        with open(os.path.join(directory, "job.yaml"), "w+") as file:
            file.write("job")

        environment = KubernetesJobEnvironment(
            job_spec_file=os.path.join(directory, "job.yaml"))
        with pytest.raises(TypeError):
            environment.execute(storage=Memory(), flow_location="")
Example #10
0
def test_environment_execute_calls_callbacks():
    start_func = MagicMock()
    exit_func = MagicMock()

    global_dict = {}

    @prefect.task
    def add_to_dict():
        global_dict["run"] = True

    environment = LocalEnvironment(on_start=start_func, on_exit=exit_func)
    storage = Memory()
    flow = prefect.Flow("test", tasks=[add_to_dict])
    flow_loc = storage.add_flow(flow)

    environment.execute(storage, flow_loc)
    assert global_dict.get("run") is True

    assert start_func.called
    assert exit_func.called
def test_build_returns_self():
    s = Memory()
    assert s.build() is s

    f = Flow("test")
    s.add_flow(f)
    assert s.build() is s
def test_multiple_flows_in_storage():
    s = Memory()
    f = Flow("test")
    g = Flow("other")
    z = Flow("not")
    s.add_flow(f)
    s.add_flow(g)

    assert "test" in s
    assert "other" in s
    assert "not" not in s

    assert s.get_flow("test") is f
    assert s.get_flow("other") is g

    assert s.flows["test"] is f
    assert s.flows["other"] is g
def test_execute_improper_storage():
    environment = CloudEnvironment()
    with pytest.raises(TypeError):
        environment.execute(storage=Memory(), flow_location="")
Example #14
0
def test_available_dates_sensor_retries(monkeypatch, postgres_test_db):
    """
    Test that the available_dates_sensor flow re-runs workflows that failed on
    the previous attempt, and does not re-run them again once they have succeeded.
    """
    # Mock flowclient
    flowclient_available_dates = {
        "dummy_cdr_type": ["2016-01-01", "2016-01-02", "2016-01-03"]
    }
    monkeypatch.setattr("flowclient.get_available_dates",
                        lambda connection: flowclient_available_dates)
    monkeypatch.setattr("flowclient.connect", Mock())
    monkeypatch.setenv("FLOWAPI_TOKEN", "DUMMY_TOKEN")

    # Mock workflows
    dummy_workflow = Mock()
    dummy_workflow.name = "DUMMY_WORKFLOW"
    dummy_workflow.run.side_effect = [Failed(), Success(), Success()]
    workflow_storage = Memory()
    workflow_storage.add_flow(dummy_workflow)

    workflow_configs = [WorkflowConfig(workflow_name="DUMMY_WORKFLOW")]

    # Run available dates sensor
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage)

    # Check that sensor flow ended in a 'failed' state, and dummy_workflow.run() was called 3 times
    assert flow_state.is_failed
    dummy_workflow.run.assert_has_calls([
        call(
            parameters=dict(reference_date=d,
                            date_ranges=[(d, d.add(days=1))]),
            run_on_schedule=False,
        ) for d in pendulum.period(pendulum.date(2016, 1, 1),
                                   pendulum.date(2016, 1, 3))
    ])

    # Reset workflow mock
    dummy_workflow.reset_mock()
    dummy_workflow.run.side_effect = None
    dummy_workflow.run.return_value = Success()

    # Run available dates sensor again
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage)

    # Check that sensor flow was successful, and dummy_workflow only re-ran for the date for which it previously failed
    assert flow_state.is_successful
    dummy_workflow.run.assert_called_once_with(
        parameters=dict(
            reference_date=pendulum.date(2016, 1, 1),
            date_ranges=[(pendulum.date(2016, 1, 1), pendulum.date(2016, 1,
                                                                   2))],
        ),
        run_on_schedule=False,
    )

    # Reset workflow mock again
    dummy_workflow.reset_mock()

    # Run available dates sensor once more
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage)

    # Check that dummy_workflow did not run again, now that it has run successfully
    assert flow_state.is_successful
    dummy_workflow.run.assert_not_called()
def test_get_flow_returns_flow():
    s = Memory()
    f = Flow("test")
    s.add_flow(f)
    runner = s.get_flow("test")
    assert runner is f
def test_get_flow_raises_if_flow_not_present():
    s = Memory()
    with pytest.raises(ValueError):
        s.get_flow("test")
def test_get_env_runner_raises():
    s = Memory()
    with pytest.raises(NotImplementedError):
        s.get_env_runner("")
Example #18
0
def test_available_dates_sensor(monkeypatch, postgres_test_db):
    """
    Test that the available_dates_sensor flow runs the specified workflows with
    the correct parameters, and does not run successful workflow runs more than
    once for the same date.
    """
    # Mock flowclient
    flowclient_available_dates = {
        "cdr_type_1": ["2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04"],
        "cdr_type_2": ["2016-01-04", "2016-01-05", "2016-01-06", "2016-01-07"],
        "cdr_type_3": ["2016-01-08"],
    }
    monkeypatch.setattr("flowclient.get_available_dates",
                        lambda connection: flowclient_available_dates)
    monkeypatch.setattr("flowclient.connect", Mock())
    monkeypatch.setenv("FLOWAPI_TOKEN", "DUMMY_TOKEN")

    # Mock workflows
    workflow_1 = Mock()
    workflow_1.name = "WORKFLOW_1"
    workflow_1.run.return_value = Success()
    workflow_2 = Mock()
    workflow_2.name = "WORKFLOW_2"
    workflow_2.run.return_value = Success()
    workflow_storage = Memory()
    workflow_storage.add_flow(workflow_1)
    workflow_storage.add_flow(workflow_2)

    workflow_configs = [
        WorkflowConfig(
            workflow_name="WORKFLOW_1",
            parameters={"DUMMY_PARAM_1": "DUMMY_VALUE_1"},
            earliest_date=pendulum.date(2016, 1, 4),
        ),
        WorkflowConfig(
            workflow_name="WORKFLOW_2",
            parameters={"DUMMY_PARAM_2": "DUMMY_VALUE_2"},
            date_stencil=DateStencil([[pendulum.date(2016, 1, 3), -1], -1, 0]),
        ),
    ]

    # Run available dates sensor
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            cdr_types=["cdr_type_1", "cdr_type_2"],
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage,
        )

    # Check that run was successful and workflows were run with the correct parameters
    assert flow_state.is_successful
    workflow_1.run.assert_has_calls([
        call(
            parameters=dict(
                reference_date=d,
                date_ranges=[(d, d.add(days=1))],
                DUMMY_PARAM_1="DUMMY_VALUE_1",
            ),
            run_on_schedule=False,
        ) for d in pendulum.period(pendulum.date(2016, 1, 4),
                                   pendulum.date(2016, 1, 7))
    ])
    workflow_2.run.assert_has_calls([
        call(
            parameters=dict(
                reference_date=d,
                date_ranges=[
                    (pendulum.date(2016, 1, 3), d.subtract(days=1)),
                    (d.subtract(days=1), d),
                    (d, d.add(days=1)),
                ],
                DUMMY_PARAM_2="DUMMY_VALUE_2",
            ),
            run_on_schedule=False,
        ) for d in pendulum.period(pendulum.date(2016, 1, 5),
                                   pendulum.date(2016, 1, 7))
    ])

    # Reset workflow mocks
    workflow_1.reset_mock()
    workflow_2.reset_mock()

    # Add more available dates
    flowclient_available_dates = {
        "cdr_type_1": ["2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04"],
        "cdr_type_2": [
            "2016-01-04",
            "2016-01-05",
            "2016-01-06",
            "2016-01-07",
            "2016-01-08",
        ],
        "cdr_type_3": ["2016-01-08"],
    }
    monkeypatch.setattr("flowclient.get_available_dates",
                        lambda connection: flowclient_available_dates)

    # Run available dates sensor again
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            cdr_types=["cdr_type_1", "cdr_type_2"],
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage,
        )

    # Check that workflows only ran for the new date
    workflow_1.run.assert_called_once_with(
        parameters=dict(
            reference_date=pendulum.date(2016, 1, 8),
            date_ranges=[(pendulum.date(2016, 1, 8), pendulum.date(2016, 1,
                                                                   9))],
            DUMMY_PARAM_1="DUMMY_VALUE_1",
        ),
        run_on_schedule=False,
    )
    workflow_2.run.assert_called_once_with(
        parameters=dict(
            reference_date=pendulum.date(2016, 1, 8),
            date_ranges=[
                (pendulum.date(2016, 1, 3), pendulum.date(2016, 1, 7)),
                (pendulum.date(2016, 1, 7), pendulum.date(2016, 1, 8)),
                (pendulum.date(2016, 1, 8), pendulum.date(2016, 1, 9)),
            ],
            DUMMY_PARAM_2="DUMMY_VALUE_2",
        ),
        run_on_schedule=False,
    )
def test_create_memory_storage():
    storage = Memory()
    assert storage