Ejemplo n.º 1
0
def test_scheduled_jobs():
    from dagster import Field, String

    @op(config_schema={"foo": Field(String)})
    def foo_op(context):
        pass

    DEFAULT_FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}}

    @job(config=DEFAULT_FOO_CONFIG)
    def foo_job():
        foo_op()

    my_schedule = ScheduleDefinition(name="my_schedule",
                                     cron_schedule="* * * * *",
                                     job=foo_job)

    context_without_time = build_schedule_context()
    execution_time = datetime(year=2019, month=2, day=27)
    context_with_time = build_schedule_context(
        scheduled_execution_time=execution_time)
    execution_data = my_schedule.evaluate_tick(context_without_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1

    validate_run_config(foo_job, execution_data.run_requests[0].run_config)
Ejemplo n.º 2
0
def test_partition_based_schedule_example():
    @solid(config_schema={"date": str})
    def process_data_for_date(context):
        return context.solid_config["date"]

    @pipeline
    def pipeline_for_test():
        process_data_for_date()

    run_config = partition_schedule_example(datetime(2021, 1, 1))

    assert validate_run_config(pipeline_for_test, run_config)

    run_config = my_timezone_schedule(datetime(2021, 1, 1))

    assert validate_run_config(pipeline_for_test, run_config)
Ejemplo n.º 3
0
def test_my_partitioned_config():
    # assert that the decorated function returns the expected output
    run_config = my_partitioned_config(datetime(2020, 1, 3), datetime(2020, 1, 4))
    assert run_config == {"ops": {"process_data_for_date": {"config": {"date": "2020-01-03"}}}}

    # assert that the output of the decorated function is valid configuration for the
    # do_stuff_partitioned job
    assert validate_run_config(do_stuff_partitioned, run_config)
Ejemplo n.º 4
0
def test_non_partition_schedule_example():
    @solid(config_schema={"dataset_name": str})
    def process_data(_):
        pass

    @pipeline
    def pipeline_for_test():
        process_data()

    run_config = non_partition_schedule_example(None)
    assert validate_run_config(pipeline_for_test, run_config)
Ejemplo n.º 5
0
def test_schedule_examples(schedule_to_test):
    @solid(config_schema={"date": str})
    def process_data_for_date(_):
        pass

    @pipeline(mode_defs=[ModeDefinition("basic")])
    def pipeline_for_test():
        process_data_for_date()

    run_config = schedule_to_test(datetime(2021, 1, 1))

    assert validate_run_config(pipeline_for_test, run_config)
Ejemplo n.º 6
0
def test_my_execution_time_schedule():
    @solid(config_schema={"dataset_name": str, "execution_date": str})
    def process_data(_):
        pass

    @pipeline
    def pipeline_for_test():
        process_data()

    run_config = my_execution_time_schedule(
        build_schedule_context(scheduled_execution_time=datetime(2021, 1, 1)))
    assert validate_run_config(pipeline_for_test, run_config)
Ejemplo n.º 7
0
def test_my_offset_partitioned_config():
    # test that the partition keys are what you expect
    keys = my_offset_partitioned_config.get_partition_keys()
    assert keys[0] == "2020-01-01"
    assert keys[1] == "2020-01-02"

    # test that the run_config for a partition is valid for do_stuff_partitioned
    run_config = my_offset_partitioned_config.get_run_config_for_partition_key(
        keys[0])
    assert validate_run_config(do_more_stuff_partitioned, run_config)

    # test that the contents of run_config are what you expect
    assert run_config == {
        "ops": {
            "process_data": {
                "config": {
                    "start": "2020-01-01-00:15",
                    "end": "2020-01-02-00:15"
                }
            }
        }
    }
def test_multiple_runs_for_successful_runs():
    def get_should_launch_run():
        return PipelineRun(
            run_id=str(uuid.uuid4()),
            status=PipelineRunStatus.SUCCESS,
            mode="prod",
            pipeline_name="download_pipeline",
            run_config={"resources": DEFAULT_PARTITION_RESOURCE_CONFIG},
        )

    with tempfile.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        for run in [
                get_should_launch_run(),
                PipelineRun(status=PipelineRunStatus.FAILURE,
                            mode="prod",
                            pipeline_name="download_pipeline"),
                PipelineRun(status=PipelineRunStatus.SUCCESS,
                            mode="dev",
                            pipeline_name="weird_pipeline"),
                PipelineRun(status=PipelineRunStatus.SUCCESS,
                            mode="test",
                            pipeline_name="download_pipeline"),
                PipelineRun(status=PipelineRunStatus.SUCCESS,
                            mode="prod",
                            pipeline_name="other"),
                get_should_launch_run(),
                get_should_launch_run(),
                get_should_launch_run(),
        ]:
            instance.add_run(run)
        run_requests = list(
            dbt_on_hn_download_finished(
                build_sensor_context(instance=instance)))
        assert len(run_requests) == 4
        for run_request in run_requests:
            assert validate_run_config(dbt_pipeline, run_request.run_config)
Ejemplo n.º 9
0
def test_validate_run_config():
    @solid
    def basic():
        pass

    @pipeline
    def basic_pipeline():
        basic()

    validate_run_config(basic_pipeline)

    @solid(config_schema={"foo": str})
    def requires_config(_):
        pass

    @pipeline
    def pipeline_requires_config():
        requires_config()

    result = validate_run_config(
        pipeline_requires_config, {"solids": {"requires_config": {"config": {"foo": "bar"}}}}
    )

    assert result == {
        "solids": {"requires_config": {"config": {"foo": "bar"}, "inputs": {}, "outputs": None}},
        "execution": {"in_process": {"retries": {"enabled": {}}}},
        "resources": {"io_manager": {"config": None}},
        "loggers": {},
    }

    result_with_storage = validate_run_config(
        pipeline_requires_config,
        {"solids": {"requires_config": {"config": {"foo": "bar"}}}},
    )

    assert result_with_storage == {
        "solids": {"requires_config": {"config": {"foo": "bar"}, "inputs": {}, "outputs": None}},
        "execution": {"in_process": {"retries": {"enabled": {}}}},
        "resources": {"io_manager": {"config": None}},
        "loggers": {},
    }

    with pytest.raises(DagsterInvalidConfigError):
        validate_run_config(pipeline_requires_config)
Ejemplo n.º 10
0
def test_my_cron_schedule_with_context():
    context = build_schedule_context(
        scheduled_execution_time=datetime.datetime(2020, 1, 1))
    run_config = my_schedule_uses_context(context)
    assert validate_run_config(pipeline_for_test, run_config)
Ejemplo n.º 11
0
def test_sensor():
    for run_request in trigger_apis(None):
        assert dg.validate_run_config(dg.log_file_pipeline, run_request.run_config)
Ejemplo n.º 12
0
def test_sensor():
    for run_request in sensor_to_test():
        assert validate_run_config(log_file_pipeline, run_request.run_config)
Ejemplo n.º 13
0
def test_my_directory_sensor_cursor():
    context = build_sensor_context(cursor="0")
    for run_request in my_directory_sensor_cursor(context):
        assert validate_run_config(log_file_pipeline, run_request.run_config)
Ejemplo n.º 14
0
def test_partition_schedule():
    schedule_data = my_schedule.evaluate_tick(build_schedule_context())
    for run_request in schedule_data.run_requests:
        validate_run_config(my_data_pipeline, run_request.run_config)
Ejemplo n.º 15
0
def test_hourly_schedule():
    run_config = hourly_schedule_to_test(datetime.datetime(2020, 1, 1))
    assert validate_run_config(pipeline_for_test, run_config)
Ejemplo n.º 16
0
def test_my_cron_schedule():
    run_config = my_cron_schedule()
    assert validate_run_config(my_pipeline_on_cron, run_config)
Ejemplo n.º 17
0
def test_configurable_job_schedule():
    context = build_schedule_context(
        scheduled_execution_time=datetime.datetime(2020, 1, 1))
    run_request = configurable_job_schedule(context)
    assert validate_run_config(configurable_job, run_request.run_config)