def test_custom_dagster_dataframe_hydration_ok(): input_dataframe = DataFrame({'foo': [1, 2, 3]}) with safe_tempfile_path() as input_csv_fp, safe_tempfile_path() as output_csv_fp: input_dataframe.to_csv(input_csv_fp) TestDataFrame = create_dagster_pandas_dataframe_type( name='TestDataFrame', columns=[PandasColumn.exists('foo'),] ) @solid( input_defs=[InputDefinition('test_df', TestDataFrame)], output_defs=[OutputDefinition(TestDataFrame)], ) def use_test_dataframe(_, test_df): test_df['bar'] = [2, 4, 6] return test_df solid_result = execute_solid( use_test_dataframe, run_config={ 'solids': { 'use_test_dataframe': { 'inputs': {'test_df': {'csv': {'path': input_csv_fp}}}, 'outputs': [{'result': {'csv': {'path': output_csv_fp}}},], } } }, ) assert solid_result.success solid_output_df = read_csv(output_csv_fp) assert all(solid_output_df['bar'] == [2, 4, 6])
def test_custom_dagster_dataframe_parametrizable_input(): @input_selector_schema( Selector({'door_a': Field(str), 'door_b': Field(str), 'door_c': Field(str),}) ) def silly_hydrator(_, which_door, _field): if which_door == 'door_a': return DataFrame({'foo': ['goat']}) elif which_door == 'door_b': return DataFrame({'foo': ['car']}) elif which_door == 'door_c': return DataFrame({'foo': ['goat']}) raise DagsterInvariantViolationError( 'You did not pick a door. You chose: {which_door}'.format(which_door=which_door) ) @output_selector_schema(Selector({'devnull': Field(str), 'nothing': Field(str)})) def silly_materializer(_, _location, _field, _value): return Materialization(label='did nothing', description='just one of those days') TestDataFrame = create_dagster_pandas_dataframe_type( name='TestDataFrame', columns=[PandasColumn.exists('foo'),], input_hydration_config=silly_hydrator, output_materialization_config=silly_materializer, ) @solid( input_defs=[InputDefinition('df', TestDataFrame)], output_defs=[OutputDefinition(TestDataFrame)], ) def did_i_win(_, df): return df solid_result = execute_solid( did_i_win, run_config={ 'solids': { 'did_i_win': { 'inputs': {'df': {'door_a': 'bar'}}, 'outputs': [{'result': {'devnull': 'baz'}}], } } }, ) assert solid_result.success output_df = solid_result.output_value() assert isinstance(output_df, DataFrame) assert output_df['foo'].tolist() == ['goat'] materialization_events = solid_result.materialization_events_during_compute assert len(materialization_events) == 1 assert materialization_events[0].event_specific_data.materialization.label == 'did nothing'
def test_custom_dagster_dataframe_loading_ok(): input_dataframe = DataFrame({"foo": [1, 2, 3]}) with safe_tempfile_path() as input_csv_fp, safe_tempfile_path( ) as output_csv_fp: input_dataframe.to_csv(input_csv_fp) TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[ PandasColumn.exists("foo"), ], ) @op( ins={"test_df": In(TestDataFrame)}, out=Out(TestDataFrame), ) def use_test_dataframe(_, test_df): test_df["bar"] = [2, 4, 6] return test_df @graph def basic_graph(): use_test_dataframe() result = basic_graph.execute_in_process( run_config={ "ops": { "use_test_dataframe": { "inputs": { "test_df": { "csv": { "path": input_csv_fp } } }, "outputs": [ { "result": { "csv": { "path": output_csv_fp } } }, ], } } }) assert result.success output_df = read_csv(output_csv_fp) assert all(output_df["bar"] == [2, 4, 6])
def test_custom_dagster_dataframe_loading_ok(): input_dataframe = DataFrame({"foo": [1, 2, 3]}) with safe_tempfile_path() as input_csv_fp, safe_tempfile_path( ) as output_csv_fp: input_dataframe.to_csv(input_csv_fp) TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[ PandasColumn.exists("foo"), ], ) @solid( input_defs=[InputDefinition("test_df", TestDataFrame)], output_defs=[OutputDefinition(TestDataFrame)], ) def use_test_dataframe(_, test_df): test_df["bar"] = [2, 4, 6] return test_df solid_result = execute_solid( use_test_dataframe, run_config={ "solids": { "use_test_dataframe": { "inputs": { "test_df": { "csv": { "path": input_csv_fp } } }, "outputs": [ { "result": { "csv": { "path": output_csv_fp } } }, ], } } }, ) assert solid_result.success solid_output_df = read_csv(output_csv_fp) assert all(solid_output_df["bar"] == [2, 4, 6])
def test_custom_dagster_dataframe_parametrizable_input(): @dagster_type_loader( Selector({ "door_a": Field(str), "door_b": Field(str), "door_c": Field(str), })) def silly_loader(_, config): which_door = list(config.keys())[0] if which_door == "door_a": return DataFrame({"foo": ["goat"]}) elif which_door == "door_b": return DataFrame({"foo": ["car"]}) elif which_door == "door_c": return DataFrame({"foo": ["goat"]}) raise DagsterInvariantViolationError( "You did not pick a door. You chose: {which_door}".format( which_door=which_door)) @dagster_type_materializer( Selector({ "devnull": Field(str), "nothing": Field(str) })) def silly_materializer(_, _config, _value): return AssetMaterialization(asset_key="nothing", description="just one of those days") TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[ PandasColumn.exists("foo"), ], loader=silly_loader, materializer=silly_materializer, ) @solid( input_defs=[InputDefinition("df", TestDataFrame)], output_defs=[OutputDefinition(TestDataFrame)], ) def did_i_win(_, df): return df solid_result = execute_solid( did_i_win, run_config={ "solids": { "did_i_win": { "inputs": { "df": { "door_a": "bar" } }, "outputs": [{ "result": { "devnull": "baz" } }], } } }, ) assert solid_result.success output_df = solid_result.output_value() assert isinstance(output_df, DataFrame) assert output_df["foo"].tolist() == ["goat"] materialization_events = solid_result.materialization_events_during_compute assert len(materialization_events) == 1 assert materialization_events[ 0].event_specific_data.materialization.label == "nothing"
def test_custom_dagster_dataframe_parametrizable_input(): @dagster_type_loader( Selector({ "door_a": Field(str), "door_b": Field(str), "door_c": Field(str), })) def silly_loader(_, config): which_door = list(config.keys())[0] if which_door == "door_a": return DataFrame({"foo": ["goat"]}) elif which_door == "door_b": return DataFrame({"foo": ["car"]}) elif which_door == "door_c": return DataFrame({"foo": ["goat"]}) raise DagsterInvariantViolationError( "You did not pick a door. You chose: {which_door}".format( which_door=which_door)) @dagster_type_materializer( Selector({ "devnull": Field(str), "nothing": Field(str) })) def silly_materializer(_, _config, _value): return AssetMaterialization(asset_key="nothing", description="just one of those days") TestDataFrame = create_dagster_pandas_dataframe_type( name="TestDataFrame", columns=[ PandasColumn.exists("foo"), ], loader=silly_loader, materializer=silly_materializer, ) @op( ins={"df": In(TestDataFrame)}, out=Out(TestDataFrame), ) def did_i_win(_, df): return df @graph def basic_graph(): did_i_win() result = basic_graph.execute_in_process( run_config={ "ops": { "did_i_win": { "inputs": { "df": { "door_a": "bar" } }, "outputs": [{ "result": { "devnull": "baz" } }], } } }) assert result.success output_df = result.output_for_node("did_i_win") assert isinstance(output_df, DataFrame) assert output_df["foo"].tolist() == ["goat"] materialization_events = [ event for event in result.all_node_events if event.is_step_materialization ] assert len(materialization_events) == 1 assert materialization_events[ 0].event_specific_data.materialization.label == "nothing"
def test_exists_column_composition(): exists_column = PandasColumn.exists('foo') assert isinstance(exists_column, PandasColumn) assert len(exists_column.constraints) == 1 assert isinstance(exists_column.constraints[0], ColumnExistsConstraint)