def RF_solid(): return dagstermill.define_dagstermill_solid( "random_forest_regression", nb_test_path("tutorial_RF"), input_defs=[InputDefinition(name="df", dagster_type=DataFrame)], )
def define_hello_world_with_output(): return dagstermill.define_dagstermill_solid( 'hello_world_output', nb_test_path('hello_world_output'), [], [OutputDefinition()])
def clean_data_solid(): return dagstermill.define_dagstermill_solid( 'clean_data', nb_test_path('clean_data'), output_defs=[OutputDefinition(DataFrame)])
def define_hello_world_with_output_notebook_solid(): return dagstermill.define_dagstermill_solid( 'hello_world_with_output_notebook', nb_test_path('hello_world'), output_notebook='notebook', )
svd.fit(user_story_matrix.matrix) total_explained_variance = svd.explained_variance_ratio_.sum() yield Output( svd, metadata={ "Total explained variance ratio": total_explained_variance, "Number of components": n_components, }, ) model_perf_notebook = define_dagstermill_solid( "recommender_model_perf", notebook_path=file_relative_path(__file__, "../notebooks/recommender_model_perf.ipynb"), input_defs=[InputDefinition(dagster_type=TruncatedSVD, name="recommender_model")], output_notebook_name="perf_notebook", ) @op( ins={ "story_titles": In( root_manager_key="warehouse_loader", metadata={ "table": "hackernews.stories", "columns": ["id", "title"], }, ), }, out=Out(
@solid_definition def bad_kernel_solid(): return dagstermill.define_dagstermill_solid('bad_kernel_solid', nb_test_path('bad_kernel')) def define_bad_kernel_pipeline(): return PipelineDefinition(name='bad_kernel_pipeline', solid_defs=[bad_kernel_solid]) reimport_solid = dagstermill.define_dagstermill_solid( 'reimport', nb_test_path('reimport'), input_defs=[InputDefinition('l', List[int])], output_defs=[OutputDefinition()], ) @solid def lister(_): return [1, 2, 3] @pipeline def reimport_pipeline(): reimport_solid(lister()) @solid_definition
def bad_kernel_solid(): return dagstermill.define_dagstermill_solid('bad_kernel_solid', nb_test_path('bad_kernel'))
def define_hello_world_with_output_notebook_solid(): return dagstermill.define_dagstermill_solid( "hello_world_with_output_notebook", nb_test_path("hello_world"), output_notebook="notebook", )
description= 'An inventory of the right sizing operations that are recommended and validated.', metadata_entries=[ EventMetadataEntry.path(output_path, 'operation_inventory_path') ], ) yield Output(None) right_size_report = dm.define_dagstermill_solid( 'right_size_report', script_relative_path('rightsizereport.ipynb'), input_defs=[ InputDefinition('advisor_analysis', Dict[str, RightSizeAnalysis]), InputDefinition('local_analysis', Dict[str, RightSizeAnalysis]), InputDefinition('cpu_utilization', UtilizationDataFrame), InputDefinition('mem_utilization', UtilizationDataFrame), InputDefinition('disk_utilization', UtilizationDataFrame), InputDefinition('compute_specs', AzureComputeSpecifications), InputDefinition('resources', ResourcesDataFrame), ], output_notebook='output_notebook') @solid(input_defs=[InputDefinition('report_notebook', FileHandle)]) def write_html_report(context: SolidExecutionContext, report_notebook: FileHandle) -> Nothing: with context.file_manager.read(report_notebook) as node_file: node = nbformat.read(node_file, nbformat.NO_CONVERT) html = convert_nodebook_node_to_html(node, full_width=True) handle = context.file_manager.write_data(html.encode(), ext='html')
def define_hello_world_solid(): return dagstermill.define_dagstermill_solid("hello_world", nb_test_path("hello_world"))
def define_hello_world_config_solid(): return dagstermill.define_dagstermill_solid( "hello_world_config", nb_test_path("hello_world_config"), config_schema={"greeting": Field(String, is_required=False, default_value="hello")}, )
def yield_obj_solid(): return dagstermill.define_dagstermill_solid( "yield_obj", nb_test_path("yield_obj"), [], [OutputDefinition(Any)], )
def yield_3_solid(): return dagstermill.define_dagstermill_solid( "yield_3", nb_test_path("yield_3"), [], [OutputDefinition(Int)], )
def bad_kernel_solid(): return dagstermill.define_dagstermill_solid("bad_kernel_solid", nb_test_path("bad_kernel"))
def test_default_description(): test_solid = define_dagstermill_solid(BACKING_NB_NAME, BACKING_NB_PATH) assert test_solid.description.startswith("This solid is backed by the notebook at ")
pipeline, repository, ) from ..data_frame import DataFrame from .pandas_hello_world.pipeline import pandas_hello_world def nb_test_path(name): return file_relative_path(__file__, "notebooks/{name}.ipynb".format(name=name)) hello_world = dagstermill.define_dagstermill_solid( name="papermill_pandas_hello_world", notebook_path=nb_test_path("papermill_pandas_hello_world"), input_defs=[InputDefinition(name="df", dagster_type=DataFrame)], output_defs=[OutputDefinition(DataFrame)], ) @pipeline( mode_defs=[ModeDefinition(resource_defs={"io_manager": fs_io_manager})], preset_defs=[ PresetDefinition.from_files( "test", config_files=[ file_relative_path( __file__, "pandas_hello_world/environments/papermill_pandas_hello_world_test.yaml", ) ],
def test_custom_description(): test_description = "custom description" test_solid = define_dagstermill_solid( BACKING_NB_NAME, BACKING_NB_PATH, description=test_description ) assert test_solid.description == test_description
def build_hello_world_job(): @job(resource_defs={ "output_notebook_io_manager": local_output_notebook_io_manager, }) def hello_world_job(): hello_world_op() return hello_world_job hello_world_with_custom_tags_and_description = dagstermill.define_dagstermill_solid( name="hello_world_custom", notebook_path=nb_test_path("hello_world"), output_notebook_name="notebook", tags={"foo": "bar"}, description="custom description", ) @pipeline(mode_defs=default_mode_defs) def hello_world_with_custom_tags_and_description_pipeline(): hello_world_with_custom_tags_and_description() hello_world_config = test_nb_solid( "hello_world_config", config_schema={ "greeting": Field(String, is_required=False, default_value="hello") },
def RF_solid(): return dagstermill.define_dagstermill_solid( 'random_forest_regression', nb_test_path('tutorial_RF'), input_defs=[InputDefinition(name='df', dagster_type=DataFrame)], )
import dagstermill as dm from docs_snippets.legacy.data_science.download_file import download_file from dagster import InputDefinition, pipeline from dagster.utils import script_relative_path k_means_iris = dm.define_dagstermill_solid( 'k_means_iris', script_relative_path('iris-kmeans_2.ipynb'), input_defs=[InputDefinition('path', str, description='Local path to the Iris dataset')], ) @pipeline def iris_pipeline(): k_means_iris(download_file())
def define_hello_world_solid(): return dagstermill.define_dagstermill_solid('hello_world', nb_test_path('hello_world'))
from dagster import pipeline from dagster.utils import file_relative_path from dagstermill import define_dagstermill_solid hello_world_notebook_solid = define_dagstermill_solid( "hello_world_notebook_solid", file_relative_path(__file__, "hello_world.ipynb"), ) @pipeline def hello_world_notebook_pipeline(): hello_world_notebook_solid()
@pipeline(mode_defs=default_mode_defs) def hello_world_with_custom_tags_and_description_pipeline(): hello_world_with_custom_tags_and_description() hello_world_config = test_nb_solid( "hello_world_config", config_schema={ "greeting": Field(String, is_required=False, default_value="hello") }, ) goodbye_config = dagstermill.define_dagstermill_solid( name="goodbye_config", notebook_path=nb_test_path("print_dagstermill_context_solid_config"), output_notebook="notebook", config_schema={ "farewell": Field(String, is_required=False, default_value="goodbye") }, ) @pipeline(mode_defs=default_mode_defs) def hello_world_config_pipeline(): hello_world_config() goodbye_config() @pipeline(mode_defs=default_mode_defs) def alias_config_pipeline(): hello_world_config.alias("aliased_greeting")() goodbye_config.alias("aliased_goodbye")()
def define_error_pipeline(): return PipelineDefinition( name='error_pipeline', solids=[dm.define_dagstermill_solid('error_solid', nb_test_path('error_notebook'))], )
def notebook_solid(name, notebook_path, input_defs, output_defs): return define_dagstermill_solid(name, _notebook_path(notebook_path), input_defs, output_defs)
def no_repo_reg_solid(): return dm.define_dagstermill_solid( 'no_repo_reg', nb_test_path('no_repo_reg_error'), outputs=[OutputDefinition(name='df', dagster_type=ComplexDagsterType)], )
def define_hello_logging_solid(): return dagstermill.define_dagstermill_solid('hello_logging', nb_test_path('hello_logging'))
def test_reserved_tags_not_overridden(): with pytest.raises(CheckError, match="key is reserved for use by Dagster"): define_dagstermill_solid(BACKING_NB_NAME, BACKING_NB_PATH, tags={"notebook_path": "~"}) with pytest.raises(CheckError, match="key is reserved for use by Dagster"): define_dagstermill_solid(BACKING_NB_NAME, BACKING_NB_PATH, tags={"kind": "py"})
def LR_solid(): return dagstermill.define_dagstermill_solid( 'linear_regression', nb_test_path('tutorial_LR'), input_defs=[InputDefinition(name='df', dagster_type=DataFrame)], )
def LR_solid(): return dagstermill.define_dagstermill_solid( "linear_regression", nb_test_path("tutorial_LR"), input_defs=[InputDefinition(name="df", dagster_type=DataFrame)], )