def test_int_pickle_schema_roundtrip(): with get_temp_file_name() as tmp_file: mat_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), environment_dict=single_output_env('produce_int', {'pickle': { 'path': tmp_file }}), solid_subset=['produce_int'], ) assert mat_result.success source_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), environment_dict=single_input_env('take_int', 'num', {'pickle': { 'path': tmp_file }}), solid_subset=['take_int'], ) assert source_result.result_for_solid('take_int').output_value() == 2
def test_basic_materialization_event(): with get_temp_file_name() as filename: result = execute_pipeline( single_int_output_pipeline(), {"solids": {"return_one": {"outputs": [{"result": {"json": {"path": filename}}}]}}}, ) assert result.success solid_result = result.result_for_solid("return_one") step_events = solid_result.step_events_by_kind[StepKind.COMPUTE] mat_event = list( filter(lambda de: de.event_type == DagsterEventType.ASSET_MATERIALIZATION, step_events) )[0] mat = mat_event.event_specific_data.materialization assert len(mat.metadata_entries) == 1 assert mat.metadata_entries[0].path path = mat.metadata_entries[0].entry_data.path with open(path, "r") as ff: value = json.loads(ff.read()) assert value == {"value": 1}
def test_basic_materialization_event(): with get_temp_file_name() as filename: result = execute_pipeline( single_int_output_pipeline(), {'solids': {'return_one': {'outputs': [{'result': {'json': {'path': filename}}}]}}}, ) assert result.success solid_result = result.result_for_solid('return_one') step_events = solid_result.step_events_by_kind[StepKind.COMPUTE] mat_event = list( filter(lambda de: de.event_type == DagsterEventType.STEP_MATERIALIZATION, step_events) )[0] mat = mat_event.event_specific_data.materialization assert len(mat.metadata_entries) == 1 assert mat.metadata_entries[0].path path = mat.metadata_entries[0].entry_data.path with open(path, 'r') as ff: value = json.loads(ff.read()) assert value == {'value': 1}
def test_int_pickle_schema_roundtrip(): with get_temp_file_name() as tmp_file: mat_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), run_config=single_output_env("produce_int", {"pickle": { "path": tmp_file }}), solid_selection={"produce_int"}, ) assert mat_result.success source_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), run_config=single_input_env("take_int", "num", {"pickle": { "path": tmp_file }}), solid_selection={"take_int"}, ) assert source_result.result_for_solid("take_int").output_value() == 2
def test_basic_start_pipeline_execution_with_materialization(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") with get_temp_file_name() as out_csv_path: run_config = { "solids": { "sum_solid": { "inputs": {"num": file_relative_path(__file__, "../data/num.csv")}, "outputs": [{"result": out_csv_path}], } } } run_logs = sync_execute_get_run_log_data( context=graphql_context, variables={ "executionParams": { "selector": selector, "runConfigData": run_config, "mode": "default", } }, ) step_mat_event = None for message in run_logs["messages"]: if message["__typename"] == "StepMaterializationEvent": # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert len(step_mat_event["materialization"]["metadataEntries"]) == 1 assert step_mat_event["materialization"]["metadataEntries"][0]["path"] == out_csv_path
def test_basic_start_pipeline_execution_with_materialization(self, graphql_context): selector = infer_pipeline_selector(graphql_context, 'csv_hello_world') with get_temp_file_name() as out_csv_path: run_config = { 'solids': { 'sum_solid': { 'inputs': {'num': file_relative_path(__file__, '../data/num.csv')}, 'outputs': [{'result': out_csv_path}], } } } run_logs = sync_execute_get_run_log_data( context=graphql_context, variables={ 'executionParams': { 'selector': selector, 'runConfigData': run_config, 'mode': 'default', } }, ) step_mat_event = None for message in run_logs['messages']: if message['__typename'] == 'StepMaterializationEvent': # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert len(step_mat_event['materialization']['metadataEntries']) == 1 assert step_mat_event['materialization']['metadataEntries'][0]['path'] == out_csv_path
def test_basic_type_materialization(): pipeline = single_string_output_pipeline() with get_temp_file_name() as filename: result = execute_pipeline( pipeline, { 'solids': { 'return_foo': { 'outputs': [{ 'result': { 'json': { 'path': filename } } }] } } }, ) assert result.success for event in result.event_list: if event.event_type == DagsterEventType.STEP_MATERIALIZATION: event = event.event_specific_data.materialization assert len(event.metadata_entries) == 3 assert event.metadata_entries[1] == EventMetadataEntry( label='system-type-name', description=None, entry_data=TextMetadataEntryData(text='String'), ) assert event.metadata_entries[2] == EventMetadataEntry( label='system-type-description', description=None, entry_data=TextMetadataEntryData(text='Any'), )
def test_basic_int_json_materialization(): with get_temp_file_name() as filename: result = execute_pipeline( single_int_output_pipeline(), { 'solids': { 'return_one': { 'outputs': [{ 'result': { 'json': { 'path': filename } } }] } } }, ) assert result.success with open(filename, 'r') as ff: value = json.loads(ff.read()) assert value == {'value': 1}
def test_string_json_schema_roundtrip(): with get_temp_file_name() as tmp_file: mat_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), environment_dict=single_output_env('produce_string', {'json': { 'path': tmp_file }}), solid_subset=['produce_string'], ) assert mat_result.success source_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), environment_dict=single_input_env('take_string', 'string', {'json': { 'path': tmp_file }}), solid_subset=['take_string'], ) assert source_result.result_for_solid( 'take_string').transformed_value() == 'foo'
def test_basic_int_json_materialization(): with get_temp_file_name() as filename: result = execute_pipeline( single_int_output_pipeline(), { "solids": { "return_one": { "outputs": [{ "result": { "json": { "path": filename } } }] } } }, ) assert result.success with open(filename, "r") as ff: value = json.loads(ff.read()) assert value == {"value": 1}
def test_string_pickle_schema_roundtrip(): with get_temp_file_name() as tmp_file: mat_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), run_config=single_output_env('produce_string', {'pickle': { 'path': tmp_file }}), solid_selection={'produce_string'}, ) assert mat_result.success source_result = _execute_pipeline_with_subset( define_test_all_scalars_pipeline(), run_config=single_input_env('take_string', 'string', {'pickle': { 'path': tmp_file }}), solid_selection={'take_string'}, ) assert source_result.result_for_solid( 'take_string').output_value() == 'foo'
def test_basic_execute_plan_with_materialization(): with get_temp_file_name() as out_csv_path: environment_dict = { 'solids': { 'sum_solid': { 'inputs': {'num': file_relative_path(__file__, '../data/num.csv')}, 'outputs': [{'result': out_csv_path}], } } } instance = DagsterInstance.ephemeral() result = execute_dagster_graphql( define_test_context(instance=instance), EXECUTION_PLAN_QUERY, variables={ 'pipeline': {'name': 'csv_hello_world'}, 'environmentConfigData': environment_dict, 'mode': 'default', }, ) steps_data = result.data['executionPlan']['steps'] assert [step_data['key'] for step_data in steps_data] == [ 'sum_solid.compute', 'sum_sq_solid.compute', ] pipeline_run = instance.create_run_for_pipeline( pipeline=csv_hello_world, environment_dict=environment_dict ) result = execute_dagster_graphql( define_test_context(instance=instance), EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': {'name': 'csv_hello_world'}, 'environmentConfigData': environment_dict, 'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'], 'executionMetadata': {'runId': pipeline_run.run_id}, 'mode': 'default', } }, ) assert result.data step_mat_event = None for message in result.data['executePlan']['stepEvents']: if message['__typename'] == 'StepMaterializationEvent': # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert step_mat_event['materialization'] assert len(step_mat_event['materialization']['metadataEntries']) == 1 metadata_entry = step_mat_event['materialization']['metadataEntries'][0] assert metadata_entry['path'] == out_csv_path
def get_temp_file_location(): with get_temp_file_name() as path: os.unlink(path) yield path
def write_zipped_file_to_s3_store(context): with get_temp_file_name() as zip_file_name: write_zip_file_to_disk(zip_file_name, "an_archive_member", foo_bytes) with open(zip_file_name, "rb") as ff: s3_file_handle = context.resources.file_manager.write_data(ff.read()) return s3_file_handle
def run_hello_world(hello_world): assert len(hello_world.input_defs) == 1 pipeline = PipelineDefinition(solids=[ dagster_pd.load_csv_solid('load_csv'), hello_world, ], dependencies={ 'hello_world': { 'num_csv': DependencyDefinition('load_csv'), }, }) pipeline_result = execute_pipeline( pipeline, environment=create_num_csv_environment(), ) result = pipeline_result.result_for_solid('hello_world') assert result.success assert result.transformed_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], } pipeline_two = PipelineDefinition( solids=[ dagster_pd.load_csv_solid('load_csv'), hello_world, dagster_pd.to_csv_solid('to_csv'), ], dependencies={ 'hello_world': { 'num_csv': DependencyDefinition('load_csv'), }, 'to_csv': { 'df': DependencyDefinition('hello_world'), } }) with get_temp_file_name() as temp_file_name: environment = config.Environment(solids={ 'load_csv': config.Solid({ 'path': script_relative_path('num.csv'), }), 'to_csv': config.Solid({ 'path': temp_file_name, }) }, ) pipeline_result = execute_pipeline( pipeline_two, environment, ) output_result = pipeline_result.result_for_solid('hello_world') assert output_result.success assert pd.read_csv(temp_file_name).to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], }
def test_basic_execute_plan_with_materialization(graphql_context): selector = infer_pipeline_selector(graphql_context, 'csv_hello_world') with get_temp_file_name() as out_csv_path: run_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, '../data/num.csv') }, 'outputs': [{ 'result': out_csv_path }], } } } result = execute_dagster_graphql( graphql_context, EXECUTION_PLAN_QUERY, variables={ 'pipeline': selector, 'runConfigData': run_config, 'mode': 'default', }, ) steps_data = result.data['executionPlanOrError']['steps'] assert set([step_data['key'] for step_data in steps_data]) == set([ 'sum_solid.compute', 'sum_sq_solid.compute', ]) instance = graphql_context.instance pipeline_run = instance.create_run_for_pipeline( pipeline_def=csv_hello_world, run_config=run_config) result = execute_dagster_graphql( graphql_context, EXECUTE_PLAN_QUERY, variables={ 'executionParams': { 'selector': selector, 'runConfigData': run_config, 'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'], 'executionMetadata': { 'runId': pipeline_run.run_id }, 'mode': 'default', } }, ) assert result.data step_mat_event = None for message in result.data['executePlan']['stepEvents']: if message['__typename'] == 'StepMaterializationEvent': # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert step_mat_event['materialization'] assert len(step_mat_event['materialization']['metadataEntries']) == 1 metadata_entry = step_mat_event['materialization']['metadataEntries'][ 0] assert metadata_entry['path'] == out_csv_path
def test_basic_execute_plan_with_materialization(graphql_context): selector = infer_pipeline_selector(graphql_context, "csv_hello_world") with get_temp_file_name() as out_csv_path: run_config = { "solids": { "sum_solid": { "inputs": { "num": file_relative_path(__file__, "../data/num.csv") }, "outputs": [{ "result": out_csv_path }], } } } result = execute_dagster_graphql( graphql_context, EXECUTION_PLAN_QUERY, variables={ "pipeline": selector, "runConfigData": run_config, "mode": "default", }, ) steps_data = result.data["executionPlanOrError"]["steps"] assert set([step_data["key"] for step_data in steps_data]) == set([ "sum_solid.compute", "sum_sq_solid.compute", ]) instance = graphql_context.instance pipeline_run = instance.create_run_for_pipeline( pipeline_def=csv_hello_world, run_config=run_config) result = execute_dagster_graphql( graphql_context, EXECUTE_PLAN_QUERY, variables={ "executionParams": { "selector": selector, "runConfigData": run_config, "stepKeys": ["sum_solid.compute", "sum_sq_solid.compute"], "executionMetadata": { "runId": pipeline_run.run_id }, "mode": "default", }, }, ) assert result.data step_mat_event = None for message in result.data["executePlan"]["stepEvents"]: if message["__typename"] == "StepMaterializationEvent": # ensure only one event assert step_mat_event is None step_mat_event = message # ensure only one event assert step_mat_event assert step_mat_event["materialization"] assert len(step_mat_event["materialization"]["metadataEntries"]) == 1 metadata_entry = step_mat_event["materialization"]["metadataEntries"][ 0] assert metadata_entry["path"] == out_csv_path