예제 #1
0
def test_int_pickle_schema_roundtrip():
    with get_temp_file_name() as tmp_file:
        mat_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            environment_dict=single_output_env('produce_int',
                                               {'pickle': {
                                                   'path': tmp_file
                                               }}),
            solid_subset=['produce_int'],
        )

        assert mat_result.success

        source_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            environment_dict=single_input_env('take_int', 'num',
                                              {'pickle': {
                                                  'path': tmp_file
                                              }}),
            solid_subset=['take_int'],
        )

        assert source_result.result_for_solid('take_int').output_value() == 2
def test_basic_materialization_event():
    with get_temp_file_name() as filename:
        result = execute_pipeline(
            single_int_output_pipeline(),
            {"solids": {"return_one": {"outputs": [{"result": {"json": {"path": filename}}}]}}},
        )

        assert result.success
        solid_result = result.result_for_solid("return_one")
        step_events = solid_result.step_events_by_kind[StepKind.COMPUTE]
        mat_event = list(
            filter(lambda de: de.event_type == DagsterEventType.ASSET_MATERIALIZATION, step_events)
        )[0]

        mat = mat_event.event_specific_data.materialization

        assert len(mat.metadata_entries) == 1
        assert mat.metadata_entries[0].path
        path = mat.metadata_entries[0].entry_data.path

        with open(path, "r") as ff:
            value = json.loads(ff.read())
            assert value == {"value": 1}
예제 #3
0
def test_basic_materialization_event():
    with get_temp_file_name() as filename:
        result = execute_pipeline(
            single_int_output_pipeline(),
            {'solids': {'return_one': {'outputs': [{'result': {'json': {'path': filename}}}]}}},
        )

        assert result.success
        solid_result = result.result_for_solid('return_one')
        step_events = solid_result.step_events_by_kind[StepKind.COMPUTE]
        mat_event = list(
            filter(lambda de: de.event_type == DagsterEventType.STEP_MATERIALIZATION, step_events)
        )[0]

        mat = mat_event.event_specific_data.materialization

        assert len(mat.metadata_entries) == 1
        assert mat.metadata_entries[0].path
        path = mat.metadata_entries[0].entry_data.path

        with open(path, 'r') as ff:
            value = json.loads(ff.read())
            assert value == {'value': 1}
예제 #4
0
def test_int_pickle_schema_roundtrip():
    with get_temp_file_name() as tmp_file:
        mat_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            run_config=single_output_env("produce_int",
                                         {"pickle": {
                                             "path": tmp_file
                                         }}),
            solid_selection={"produce_int"},
        )

        assert mat_result.success

        source_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            run_config=single_input_env("take_int", "num",
                                        {"pickle": {
                                            "path": tmp_file
                                        }}),
            solid_selection={"take_int"},
        )

        assert source_result.result_for_solid("take_int").output_value() == 2
예제 #5
0
    def test_basic_start_pipeline_execution_with_materialization(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, "csv_hello_world")

        with get_temp_file_name() as out_csv_path:

            run_config = {
                "solids": {
                    "sum_solid": {
                        "inputs": {"num": file_relative_path(__file__, "../data/num.csv")},
                        "outputs": [{"result": out_csv_path}],
                    }
                }
            }

            run_logs = sync_execute_get_run_log_data(
                context=graphql_context,
                variables={
                    "executionParams": {
                        "selector": selector,
                        "runConfigData": run_config,
                        "mode": "default",
                    }
                },
            )

            step_mat_event = None

            for message in run_logs["messages"]:
                if message["__typename"] == "StepMaterializationEvent":
                    # ensure only one event
                    assert step_mat_event is None
                    step_mat_event = message

            # ensure only one event
            assert step_mat_event
            assert len(step_mat_event["materialization"]["metadataEntries"]) == 1
            assert step_mat_event["materialization"]["metadataEntries"][0]["path"] == out_csv_path
예제 #6
0
    def test_basic_start_pipeline_execution_with_materialization(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, 'csv_hello_world')

        with get_temp_file_name() as out_csv_path:

            run_config = {
                'solids': {
                    'sum_solid': {
                        'inputs': {'num': file_relative_path(__file__, '../data/num.csv')},
                        'outputs': [{'result': out_csv_path}],
                    }
                }
            }

            run_logs = sync_execute_get_run_log_data(
                context=graphql_context,
                variables={
                    'executionParams': {
                        'selector': selector,
                        'runConfigData': run_config,
                        'mode': 'default',
                    }
                },
            )

            step_mat_event = None

            for message in run_logs['messages']:
                if message['__typename'] == 'StepMaterializationEvent':
                    # ensure only one event
                    assert step_mat_event is None
                    step_mat_event = message

            # ensure only one event
            assert step_mat_event
            assert len(step_mat_event['materialization']['metadataEntries']) == 1
            assert step_mat_event['materialization']['metadataEntries'][0]['path'] == out_csv_path
def test_basic_type_materialization():
    pipeline = single_string_output_pipeline()

    with get_temp_file_name() as filename:
        result = execute_pipeline(
            pipeline,
            {
                'solids': {
                    'return_foo': {
                        'outputs': [{
                            'result': {
                                'json': {
                                    'path': filename
                                }
                            }
                        }]
                    }
                }
            },
        )

        assert result.success
        for event in result.event_list:
            if event.event_type == DagsterEventType.STEP_MATERIALIZATION:
                event = event.event_specific_data.materialization
                assert len(event.metadata_entries) == 3
                assert event.metadata_entries[1] == EventMetadataEntry(
                    label='system-type-name',
                    description=None,
                    entry_data=TextMetadataEntryData(text='String'),
                )
                assert event.metadata_entries[2] == EventMetadataEntry(
                    label='system-type-description',
                    description=None,
                    entry_data=TextMetadataEntryData(text='Any'),
                )
def test_basic_int_json_materialization():
    with get_temp_file_name() as filename:
        result = execute_pipeline(
            single_int_output_pipeline(),
            {
                'solids': {
                    'return_one': {
                        'outputs': [{
                            'result': {
                                'json': {
                                    'path': filename
                                }
                            }
                        }]
                    }
                }
            },
        )

        assert result.success

        with open(filename, 'r') as ff:
            value = json.loads(ff.read())
            assert value == {'value': 1}
예제 #9
0
def test_string_json_schema_roundtrip():
    with get_temp_file_name() as tmp_file:
        mat_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            environment_dict=single_output_env('produce_string',
                                               {'json': {
                                                   'path': tmp_file
                                               }}),
            solid_subset=['produce_string'],
        )

        assert mat_result.success

        source_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            environment_dict=single_input_env('take_string', 'string',
                                              {'json': {
                                                  'path': tmp_file
                                              }}),
            solid_subset=['take_string'],
        )

        assert source_result.result_for_solid(
            'take_string').transformed_value() == 'foo'
예제 #10
0
def test_basic_int_json_materialization():
    with get_temp_file_name() as filename:
        result = execute_pipeline(
            single_int_output_pipeline(),
            {
                "solids": {
                    "return_one": {
                        "outputs": [{
                            "result": {
                                "json": {
                                    "path": filename
                                }
                            }
                        }]
                    }
                }
            },
        )

        assert result.success

        with open(filename, "r") as ff:
            value = json.loads(ff.read())
            assert value == {"value": 1}
예제 #11
0
def test_string_pickle_schema_roundtrip():
    with get_temp_file_name() as tmp_file:
        mat_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            run_config=single_output_env('produce_string',
                                         {'pickle': {
                                             'path': tmp_file
                                         }}),
            solid_selection={'produce_string'},
        )

        assert mat_result.success

        source_result = _execute_pipeline_with_subset(
            define_test_all_scalars_pipeline(),
            run_config=single_input_env('take_string', 'string',
                                        {'pickle': {
                                            'path': tmp_file
                                        }}),
            solid_selection={'take_string'},
        )

        assert source_result.result_for_solid(
            'take_string').output_value() == 'foo'
예제 #12
0
def test_basic_execute_plan_with_materialization():
    with get_temp_file_name() as out_csv_path:

        environment_dict = {
            'solids': {
                'sum_solid': {
                    'inputs': {'num': file_relative_path(__file__, '../data/num.csv')},
                    'outputs': [{'result': out_csv_path}],
                }
            }
        }

        instance = DagsterInstance.ephemeral()

        result = execute_dagster_graphql(
            define_test_context(instance=instance),
            EXECUTION_PLAN_QUERY,
            variables={
                'pipeline': {'name': 'csv_hello_world'},
                'environmentConfigData': environment_dict,
                'mode': 'default',
            },
        )

        steps_data = result.data['executionPlan']['steps']

        assert [step_data['key'] for step_data in steps_data] == [
            'sum_solid.compute',
            'sum_sq_solid.compute',
        ]

        pipeline_run = instance.create_run_for_pipeline(
            pipeline=csv_hello_world, environment_dict=environment_dict
        )

        result = execute_dagster_graphql(
            define_test_context(instance=instance),
            EXECUTE_PLAN_QUERY,
            variables={
                'executionParams': {
                    'selector': {'name': 'csv_hello_world'},
                    'environmentConfigData': environment_dict,
                    'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'],
                    'executionMetadata': {'runId': pipeline_run.run_id},
                    'mode': 'default',
                }
            },
        )

        assert result.data

        step_mat_event = None

        for message in result.data['executePlan']['stepEvents']:
            if message['__typename'] == 'StepMaterializationEvent':
                # ensure only one event
                assert step_mat_event is None
                step_mat_event = message

        # ensure only one event
        assert step_mat_event
        assert step_mat_event['materialization']
        assert len(step_mat_event['materialization']['metadataEntries']) == 1
        metadata_entry = step_mat_event['materialization']['metadataEntries'][0]
        assert metadata_entry['path'] == out_csv_path
예제 #13
0
def get_temp_file_location():
    with get_temp_file_name() as path:
        os.unlink(path)
        yield path
예제 #14
0
 def write_zipped_file_to_s3_store(context):
     with get_temp_file_name() as zip_file_name:
         write_zip_file_to_disk(zip_file_name, "an_archive_member", foo_bytes)
         with open(zip_file_name, "rb") as ff:
             s3_file_handle = context.resources.file_manager.write_data(ff.read())
             return s3_file_handle
예제 #15
0
def run_hello_world(hello_world):
    assert len(hello_world.input_defs) == 1

    pipeline = PipelineDefinition(solids=[
        dagster_pd.load_csv_solid('load_csv'),
        hello_world,
    ],
                                  dependencies={
                                      'hello_world': {
                                          'num_csv':
                                          DependencyDefinition('load_csv'),
                                      },
                                  })

    pipeline_result = execute_pipeline(
        pipeline,
        environment=create_num_csv_environment(),
    )

    result = pipeline_result.result_for_solid('hello_world')

    assert result.success

    assert result.transformed_value().to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7],
    }

    pipeline_two = PipelineDefinition(
        solids=[
            dagster_pd.load_csv_solid('load_csv'),
            hello_world,
            dagster_pd.to_csv_solid('to_csv'),
        ],
        dependencies={
            'hello_world': {
                'num_csv': DependencyDefinition('load_csv'),
            },
            'to_csv': {
                'df': DependencyDefinition('hello_world'),
            }
        })

    with get_temp_file_name() as temp_file_name:
        environment = config.Environment(solids={
            'load_csv':
            config.Solid({
                'path': script_relative_path('num.csv'),
            }),
            'to_csv':
            config.Solid({
                'path': temp_file_name,
            })
        }, )
        pipeline_result = execute_pipeline(
            pipeline_two,
            environment,
        )

        output_result = pipeline_result.result_for_solid('hello_world')

        assert output_result.success

        assert pd.read_csv(temp_file_name).to_dict('list') == {
            'num1': [1, 3],
            'num2': [2, 4],
            'sum': [3, 7],
        }
예제 #16
0
def test_basic_execute_plan_with_materialization(graphql_context):
    selector = infer_pipeline_selector(graphql_context, 'csv_hello_world')
    with get_temp_file_name() as out_csv_path:

        run_config = {
            'solids': {
                'sum_solid': {
                    'inputs': {
                        'num': file_relative_path(__file__, '../data/num.csv')
                    },
                    'outputs': [{
                        'result': out_csv_path
                    }],
                }
            }
        }

        result = execute_dagster_graphql(
            graphql_context,
            EXECUTION_PLAN_QUERY,
            variables={
                'pipeline': selector,
                'runConfigData': run_config,
                'mode': 'default',
            },
        )

        steps_data = result.data['executionPlanOrError']['steps']

        assert set([step_data['key'] for step_data in steps_data]) == set([
            'sum_solid.compute',
            'sum_sq_solid.compute',
        ])

        instance = graphql_context.instance

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=csv_hello_world, run_config=run_config)

        result = execute_dagster_graphql(
            graphql_context,
            EXECUTE_PLAN_QUERY,
            variables={
                'executionParams': {
                    'selector': selector,
                    'runConfigData': run_config,
                    'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'],
                    'executionMetadata': {
                        'runId': pipeline_run.run_id
                    },
                    'mode': 'default',
                }
            },
        )

        assert result.data

        step_mat_event = None

        for message in result.data['executePlan']['stepEvents']:
            if message['__typename'] == 'StepMaterializationEvent':
                # ensure only one event
                assert step_mat_event is None
                step_mat_event = message

        # ensure only one event
        assert step_mat_event
        assert step_mat_event['materialization']
        assert len(step_mat_event['materialization']['metadataEntries']) == 1
        metadata_entry = step_mat_event['materialization']['metadataEntries'][
            0]
        assert metadata_entry['path'] == out_csv_path
예제 #17
0
def test_basic_execute_plan_with_materialization(graphql_context):
    selector = infer_pipeline_selector(graphql_context, "csv_hello_world")
    with get_temp_file_name() as out_csv_path:

        run_config = {
            "solids": {
                "sum_solid": {
                    "inputs": {
                        "num": file_relative_path(__file__, "../data/num.csv")
                    },
                    "outputs": [{
                        "result": out_csv_path
                    }],
                }
            }
        }

        result = execute_dagster_graphql(
            graphql_context,
            EXECUTION_PLAN_QUERY,
            variables={
                "pipeline": selector,
                "runConfigData": run_config,
                "mode": "default",
            },
        )

        steps_data = result.data["executionPlanOrError"]["steps"]

        assert set([step_data["key"] for step_data in steps_data]) == set([
            "sum_solid.compute",
            "sum_sq_solid.compute",
        ])

        instance = graphql_context.instance

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=csv_hello_world, run_config=run_config)

        result = execute_dagster_graphql(
            graphql_context,
            EXECUTE_PLAN_QUERY,
            variables={
                "executionParams": {
                    "selector": selector,
                    "runConfigData": run_config,
                    "stepKeys": ["sum_solid.compute", "sum_sq_solid.compute"],
                    "executionMetadata": {
                        "runId": pipeline_run.run_id
                    },
                    "mode": "default",
                },
            },
        )

        assert result.data

        step_mat_event = None

        for message in result.data["executePlan"]["stepEvents"]:
            if message["__typename"] == "StepMaterializationEvent":
                # ensure only one event
                assert step_mat_event is None
                step_mat_event = message

        # ensure only one event
        assert step_mat_event
        assert step_mat_event["materialization"]
        assert len(step_mat_event["materialization"]["metadataEntries"]) == 1
        metadata_entry = step_mat_event["materialization"]["metadataEntries"][
            0]
        assert metadata_entry["path"] == out_csv_path