Ejemplo n.º 1
0
 def materialize(_):
     yield Materialization(
         label='all_types',
         description='a materialization with all metadata types',
         metadata_entries=[
             EventMetadataEntry.text('text is cool', 'text'),
             EventMetadataEntry.url('https://bigty.pe/neato', 'url'),
             EventMetadataEntry.fspath('/tmp/awesome', 'path'),
             EventMetadataEntry.json({'is_dope': True}, 'json'),
         ],
     )
     yield Output(None)
Ejemplo n.º 2
0
def my_failure_metadata_solid(_):
    path = "/path/to/files"
    my_files = get_files(path)
    if len(my_files) == 0:
        raise Failure(
            description="No files to process",
            metadata_entries=[
                EventMetadataEntry.fspath(path, label="filepath"),
                EventMetadataEntry.url("http://mycoolsite.com/failures", label="dashboard_url"),
            ],
        )
    return some_calculation(my_files)
Ejemplo n.º 3
0
def my_metadata_materialization_solid(context, df):
    do_some_transform(df)
    persist_to_storage(df)
    yield AssetMaterialization(
        asset_key="my_dataset",
        description="Persisted result to storage",
        metadata_entries=[
            EventMetadataEntry.text("Text-based metadata for this event", label="text_metadata"),
            EventMetadataEntry.fspath("/path/to/data/on/filesystem"),
            EventMetadataEntry.url("http://mycoolsite.com/url_for_my_data", label="dashboard_url"),
            EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"),
        ],
    )
    yield Output(df)
Ejemplo n.º 4
0
def many_table_materializations(_context):
    for table in raw_tables:
        yield Materialization(
            label='table_info',
            metadata_entries=[
                EventMetadataEntry.text(text=table, label='table_name'),
                EventMetadataEntry.fspath(path='/path/to/{}'.format(table),
                                          label='table_path'),
                EventMetadataEntry.json(data={'name': table},
                                        label='table_data'),
                EventMetadataEntry.url(url='https://bigty.pe/{}'.format(table),
                                       label='table_name_big'),
            ],
        )
Ejemplo n.º 5
0
def my_metadata_materialization_solid(context):
    df = read_df()
    remote_storage_path = persist_to_storage(df)
    yield AssetMaterialization(
        asset_key="my_dataset",
        description="Persisted result to storage",
        metadata_entries=[
            EventMetadataEntry.text("Text-based metadata for this event", label="text_metadata"),
            EventMetadataEntry.fspath(remote_storage_path),
            EventMetadataEntry.url("http://mycoolsite.com/url_for_my_data", label="dashboard_url"),
            EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"),
        ],
    )
    yield Output(remote_storage_path)
Ejemplo n.º 6
0
 def materialization_and_expectation(_context):
     yield Materialization(
         label='all_types',
         description='a materialization with all metadata types',
         metadata_entries=[
             EventMetadataEntry.text('text is cool', 'text'),
             EventMetadataEntry.url('https://bigty.pe/neato', 'url'),
             EventMetadataEntry.fspath('/tmp/awesome', 'path'),
             EventMetadataEntry.json({'is_dope': True}, 'json'),
         ],
     )
     yield ExpectationResult(success=True, label='row_count', description='passed')
     yield ExpectationResult(True)
     yield Output(True)
Ejemplo n.º 7
0
 def materialization_and_expectation(_context):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
         ],
     )
     yield ExpectationResult(success=True, label="row_count", description="passed")
     yield ExpectationResult(True)
     yield Output(True)
Ejemplo n.º 8
0
 def materialize(_):
     yield Materialization(
         label='all_types',
         description='a materialization with all metadata types',
         metadata_entries=[
             EventMetadataEntry.text('text is cool', 'text'),
             EventMetadataEntry.url('https://bigty.pe/neato', 'url'),
             EventMetadataEntry.fspath('/tmp/awesome', 'path'),
             EventMetadataEntry.json({'is_dope': True}, 'json'),
             EventMetadataEntry.python_artifact(EventMetadataEntry, 'python class'),
             EventMetadataEntry.python_artifact(file_relative_path, 'python function'),
             EventMetadataEntry.float(1.2, 'float'),
         ],
     )
     yield Output(None)
Ejemplo n.º 9
0
def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), 'r') as f:
        md_str = f.read()
        for table in raw_tables:
            yield Materialization(
                label='table_info',
                metadata_entries=[
                    EventMetadataEntry.text(text=table, label='table_name'),
                    EventMetadataEntry.fspath(path='/path/to/{}'.format(table), label='table_path'),
                    EventMetadataEntry.json(data={'name': table}, label='table_data'),
                    EventMetadataEntry.url(
                        url='https://bigty.pe/{}'.format(table), label='table_name_big'
                    ),
                    EventMetadataEntry.md(md_str=md_str, label='table_blurb'),
                ],
            )
Ejemplo n.º 10
0
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry, "python class"),
             EventMetadataEntry.python_artifact(file_relative_path, "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
         ],
     )
     yield Output(None)
Ejemplo n.º 11
0
def my_metadata_materialization_solid(context, df):
    do_some_transform(df)
    persist_to_storage(df)
    yield Materialization(
        label='my_dataset',
        description='Persisted result to storage',
        metadata_entries=[
            EventMetadataEntry.text('Text-based metadata for this event',
                                    label='text_metadata'),
            EventMetadataEntry.fspath('/path/to/data/on/filesystem'),
            EventMetadataEntry.url('http://mycoolsite.com/url_for_my_data',
                                   label='dashboard_url'),
            EventMetadataEntry.float(calculate_bytes(df), 'size (bytes)'),
        ],
    )
    yield Output(df)
Ejemplo n.º 12
0
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                "python class"),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
             EventMetadataEntry.int(LONG_INT, "long int"),
             EventMetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             EventMetadataEntry.asset(AssetKey("my_asset"), "my asset"),
             EventMetadataEntry.table(
                 label="table",
                 records=[
                     TableRecord(foo=1, bar=2),
                     TableRecord(foo=3, bar=4),
                 ],
             ),
             EventMetadataEntry.table_schema(
                 label="table_schema",
                 schema=TableSchema(
                     columns=[
                         TableColumn(
                             name="foo",
                             type="integer",
                             constraints=TableColumnConstraints(
                                 unique=True),
                         ),
                         TableColumn(name="bar", type="string"),
                     ],
                     constraints=TableConstraints(other=["some constraint"
                                                         ], ),
                 ),
             ),
         ],
     )
     yield Output(None)
Ejemplo n.º 13
0
def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), "r") as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key="table_info",
                metadata_entries=[
                    EventMetadataEntry.text(text=table, label="table_name"),
                    EventMetadataEntry.fspath(path="/path/to/{}".format(table),
                                              label="table_path"),
                    EventMetadataEntry.json(data={"name": table},
                                            label="table_data"),
                    EventMetadataEntry.url(
                        url="https://bigty.pe/{}".format(table),
                        label="table_name_big"),
                    EventMetadataEntry.md(md_str=md_str, label="table_blurb"),
                ],
            )
Ejemplo n.º 14
0
 def backcompat_materialize(_):
     yield Materialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry, "python class"),
             EventMetadataEntry.python_artifact(file_relative_path, "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
             EventMetadataEntry.int(LONG_INT, "long int"),
             EventMetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             EventMetadataEntry.asset(AssetKey("my_asset"), "my asset"),
         ],
     )
     yield Output(None)
Ejemplo n.º 15
0
def read_file(context):
    relative_filename = context.solid_config["filename"]
    directory = context.solid_config["directory"]
    filename = os.path.join(directory, relative_filename)
    try:
        fstats = os.stat(filename)
        context.log.info("Found file {}".format(relative_filename))
        yield AssetMaterialization(
            asset_key=AssetKey(["log_file", relative_filename]),
            metadata_entries=[
                EventMetadataEntry.fspath(filename),
                EventMetadataEntry.json(
                    {
                        "size": fstats.st_size,
                        "ctime": fstats.st_ctime,
                        "mtime": fstats.st_mtime,
                    },
                    "File stats",
                ),
            ],
        )
        yield Output(relative_filename)
    except FileNotFoundError:
        context.log.error("No file found: {}".format(relative_filename))
Ejemplo n.º 16
0
    def _t_fn(compute_context, inputs):
        check.inst_param(compute_context, 'compute_context',
                         ComputeExecutionContext)
        check.param_invariant(
            isinstance(compute_context.environment_dict, dict),
            'context',
            'SystemComputeExecutionContext must have valid environment_dict',
        )

        system_compute_context = compute_context.get_system_context()

        base_dir = '/tmp/dagstermill/{run_id}/'.format(
            run_id=compute_context.run_id)
        output_notebook_dir = os.path.join(base_dir, 'output_notebooks/')
        mkdir_p(output_notebook_dir)

        temp_path = os.path.join(
            output_notebook_dir,
            '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())))

        with tempfile.NamedTemporaryFile() as output_log_file:
            output_log_path = output_log_file.name
            init_db(output_log_path)

            nb = load_notebook_node(notebook_path)
            nb_no_parameters = replace_parameters(
                system_compute_context,
                nb,
                get_papermill_parameters(system_compute_context, inputs,
                                         output_log_path),
            )
            intermediate_path = os.path.join(
                output_notebook_dir,
                '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4())))
            write_ipynb(nb_no_parameters, intermediate_path)

            # Although the type of is_done is threading._Event in py2, not threading.Event,
            # it is still constructed using the threading.Event() factory
            is_done = threading.Event()

            def log_watcher_thread_target():
                log_watcher = JsonSqlite3LogWatcher(output_log_path,
                                                    system_compute_context.log,
                                                    is_done)
                log_watcher.watch()

            log_watcher_thread = threading.Thread(
                target=log_watcher_thread_target)

            log_watcher_thread.start()

            with user_code_error_boundary(
                    DagstermillExecutionError,
                    lambda:
                    'Error occurred during the execution of Dagstermill solid '
                    '{solid_name}: {notebook_path}'.format(
                        solid_name=name, notebook_path=notebook_path),
            ):
                try:
                    papermill_engines.register('dagstermill',
                                               DagstermillNBConvertEngine)
                    papermill.execute_notebook(intermediate_path,
                                               temp_path,
                                               engine_name='dagstermill',
                                               log_output=True)
                except Exception as exc:
                    yield Materialization(
                        label='output_notebook',
                        description=
                        'Location of output notebook on the filesystem',
                        metadata_entries=[
                            EventMetadataEntry.fspath(temp_path)
                        ],
                    )
                    raise exc
                finally:
                    is_done.set()
                    log_watcher_thread.join()

            output_nb = scrapbook.read_notebook(temp_path)

            system_compute_context.log.debug(
                'Notebook execution complete for {name}. Data is {data}'.
                format(name=name, data=output_nb.scraps))

            yield Materialization(
                label='output_notebook',
                description='Location of output notebook on the filesystem',
                metadata_entries=[EventMetadataEntry.fspath(temp_path)],
            )

            for (output_name, output_def
                 ) in system_compute_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    value = read_value(output_def.runtime_type,
                                       data_dict[output_name])

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                print(output_nb.scraps)
                if key.startswith('event-'):
                    with open(value.data, 'rb') as fd:
                        yield pickle.loads(fd.read())
Ejemplo n.º 17
0
    def _t_fn(compute_context, inputs):
        check.inst_param(compute_context, "compute_context",
                         SolidExecutionContext)
        check.param_invariant(
            isinstance(compute_context.run_config, dict),
            "context",
            "SystemComputeExecutionContext must have valid run_config",
        )

        system_compute_context = compute_context.get_system_context()

        with seven.TemporaryDirectory() as output_notebook_dir:
            with safe_tempfile_path() as output_log_path:

                parameterized_notebook_path = os.path.join(
                    output_notebook_dir,
                    "{prefix}-inter.ipynb".format(prefix=str(uuid.uuid4())))

                executed_notebook_path = os.path.join(
                    output_notebook_dir,
                    "{prefix}-out.ipynb".format(prefix=str(uuid.uuid4())))

                # Scaffold the registration here
                nb = load_notebook_node(notebook_path)
                nb_no_parameters = replace_parameters(
                    system_compute_context,
                    nb,
                    get_papermill_parameters(system_compute_context, inputs,
                                             output_log_path),
                )
                write_ipynb(nb_no_parameters, parameterized_notebook_path)

                with user_code_error_boundary(
                        DagstermillExecutionError,
                        lambda:
                    ("Error occurred during the execution of Dagstermill solid "
                     "{solid_name}: {notebook_path}".format(
                         solid_name=name, notebook_path=notebook_path)),
                ):
                    try:
                        papermill_engines.register("dagstermill",
                                                   DagstermillNBConvertEngine)
                        papermill.execute_notebook(
                            input_path=parameterized_notebook_path,
                            output_path=executed_notebook_path,
                            engine_name="dagstermill",
                            log_output=True,
                        )

                    except Exception as exc:  # pylint: disable=broad-except
                        try:
                            with open(executed_notebook_path, "rb") as fd:
                                executed_notebook_file_handle = compute_context.resources.file_manager.write(
                                    fd, mode="wb", ext="ipynb")
                                executed_notebook_materialization_path = (
                                    executed_notebook_file_handle.path_desc)
                        except Exception as exc_inner:  # pylint: disable=broad-except
                            compute_context.log.warning(
                                "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}"
                                .format(exc=exc_inner))
                            executed_notebook_materialization_path = executed_notebook_path

                        yield AssetMaterialization(
                            asset_key=(asset_key_prefix +
                                       [f"{name}_output_notebook"]),
                            description=
                            "Location of output notebook in file manager",
                            metadata_entries=[
                                EventMetadataEntry.fspath(
                                    executed_notebook_materialization_path,
                                    label="executed_notebook_path",
                                )
                            ],
                        )
                        raise exc

            system_compute_context.log.debug(
                "Notebook execution complete for {name} at {executed_notebook_path}."
                .format(
                    name=name,
                    executed_notebook_path=executed_notebook_path,
                ))

            executed_notebook_file_handle = None
            try:
                # use binary mode when when moving the file since certain file_managers such as S3
                # may try to hash the contents
                with open(executed_notebook_path, "rb") as fd:
                    executed_notebook_file_handle = compute_context.resources.file_manager.write(
                        fd, mode="wb", ext="ipynb")
                    executed_notebook_materialization_path = executed_notebook_file_handle.path_desc
            except Exception as exc:  # pylint: disable=broad-except
                compute_context.log.warning(
                    "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}"
                    .format(exc=str(exc)))
                executed_notebook_materialization_path = executed_notebook_path

            yield AssetMaterialization(
                asset_key=(asset_key_prefix + [f"{name}_output_notebook"]),
                description="Location of output notebook in file manager",
                metadata_entries=[
                    EventMetadataEntry.fspath(
                        executed_notebook_materialization_path)
                ],
            )

            if output_notebook is not None:
                yield Output(executed_notebook_file_handle, output_notebook)

            # deferred import for perf
            import scrapbook

            output_nb = scrapbook.read_notebook(executed_notebook_path)

            for (output_name, output_def
                 ) in system_compute_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    value = read_value(output_def.dagster_type,
                                       data_dict[output_name])

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                if key.startswith("event-"):
                    with open(value.data, "rb") as fd:
                        yield pickle.loads(fd.read())
Ejemplo n.º 18
0
    def _t_fn(compute_context, inputs):
        check.inst_param(compute_context, 'compute_context',
                         SolidExecutionContext)
        check.param_invariant(
            isinstance(compute_context.environment_dict, dict),
            'context',
            'SystemComputeExecutionContext must have valid environment_dict',
        )

        system_compute_context = compute_context.get_system_context()

        with seven.TemporaryDirectory() as output_notebook_dir:
            with safe_tempfile_path() as output_log_path:

                parameterized_notebook_path = os.path.join(
                    output_notebook_dir,
                    '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4())))

                executed_notebook_path = os.path.join(
                    output_notebook_dir,
                    '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())))

                # Scaffold the registration here
                nb = load_notebook_node(notebook_path)
                nb_no_parameters = replace_parameters(
                    system_compute_context,
                    nb,
                    get_papermill_parameters(system_compute_context, inputs,
                                             output_log_path),
                )
                write_ipynb(nb_no_parameters, parameterized_notebook_path)

                with user_code_error_boundary(
                        DagstermillExecutionError,
                        lambda:
                    ('Error occurred during the execution of Dagstermill solid '
                     '{solid_name}: {notebook_path}'.format(
                         solid_name=name, notebook_path=notebook_path)),
                ):
                    try:
                        papermill_engines.register('dagstermill',
                                                   DagstermillNBConvertEngine)
                        papermill.execute_notebook(
                            input_path=parameterized_notebook_path,
                            output_path=executed_notebook_path,
                            engine_name='dagstermill',
                            log_output=True,
                        )

                    except Exception as exc:  # pylint: disable=broad-except
                        try:
                            with open(executed_notebook_path, 'r') as fd:
                                executed_notebook_file_handle = compute_context.file_manager.write(
                                    fd, mode='w', ext='ipynb')
                                executed_notebook_materialization_path = (
                                    executed_notebook_file_handle.path_desc)
                        except Exception as exc_inner:  # pylint: disable=broad-except
                            compute_context.log.warning(
                                'Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}'
                                .format(exc=exc_inner))
                            executed_notebook_materialization_path = executed_notebook_path

                        yield Materialization(
                            label='output_notebook',
                            description=
                            'Location of output notebook in file manager',
                            metadata_entries=[
                                EventMetadataEntry.fspath(
                                    executed_notebook_materialization_path)
                            ],
                        )
                        raise exc

            system_compute_context.log.debug(
                'Notebook execution complete for {name} at {executed_notebook_path}.'
                .format(
                    name=name,
                    executed_notebook_path=executed_notebook_path,
                ))

            try:
                with open(executed_notebook_path, 'r') as fd:
                    executed_notebook_file_handle = compute_context.file_manager.write(
                        fd, mode='w', ext='ipynb')
                    executed_notebook_materialization_path = executed_notebook_file_handle.path_desc
            except Exception as exc:  # pylint: disable=broad-except
                compute_context.log.warning(
                    'Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}'
                    .format(exc=str(exc)))
                executed_notebook_materialization_path = executed_notebook_path

            yield Materialization(
                label='output_notebook',
                description='Location of output notebook in file manager',
                metadata_entries=[
                    EventMetadataEntry.fspath(
                        executed_notebook_materialization_path)
                ],
            )

            if output_notebook is not None:
                yield Output(executed_notebook_file_handle, output_notebook)

            # deferred import for perf
            import scrapbook

            output_nb = scrapbook.read_notebook(executed_notebook_path)

            for (output_name, output_def
                 ) in system_compute_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    value = read_value(output_def.dagster_type,
                                       data_dict[output_name])

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                if key.startswith('event-'):
                    with open(value.data, 'rb') as fd:
                        yield pickle.loads(fd.read())
Ejemplo n.º 19
0
    def _t_fn(step_context, inputs):
        check.inst_param(step_context, "step_context", SolidExecutionContext)
        check.param_invariant(
            isinstance(step_context.run_config, dict),
            "context",
            "StepExecutionContext must have valid run_config",
        )

        step_execution_context = step_context.get_step_execution_context()

        with tempfile.TemporaryDirectory() as output_notebook_dir:
            with safe_tempfile_path() as output_log_path:

                parameterized_notebook_path = os.path.join(
                    output_notebook_dir,
                    "{prefix}-inter.ipynb".format(prefix=str(uuid.uuid4())))

                executed_notebook_path = os.path.join(
                    output_notebook_dir,
                    "{prefix}-out.ipynb".format(prefix=str(uuid.uuid4())))

                # Scaffold the registration here
                nb = load_notebook_node(notebook_path)
                nb_no_parameters = replace_parameters(
                    step_execution_context,
                    nb,
                    get_papermill_parameters(step_execution_context, inputs,
                                             output_log_path),
                )
                write_ipynb(nb_no_parameters, parameterized_notebook_path)

                try:
                    papermill_engines.register("dagstermill",
                                               DagstermillNBConvertEngine)
                    papermill.execute_notebook(
                        input_path=parameterized_notebook_path,
                        output_path=executed_notebook_path,
                        engine_name="dagstermill",
                        log_output=True,
                    )

                except Exception as ex:  # pylint: disable=broad-except
                    try:
                        with open(executed_notebook_path, "rb") as fd:
                            executed_notebook_file_handle = (
                                step_context.resources.file_manager.write(
                                    fd, mode="wb", ext="ipynb"))
                            executed_notebook_materialization_path = (
                                executed_notebook_file_handle.path_desc)
                    except Exception:  # pylint: disable=broad-except
                        step_context.log.warning(
                            "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}"
                            .format(exc=str(
                                serializable_error_info_from_exc_info(
                                    sys.exc_info()))))
                        executed_notebook_materialization_path = executed_notebook_path

                    yield AssetMaterialization(
                        asset_key=(asset_key_prefix +
                                   [f"{name}_output_notebook"]),
                        description=
                        "Location of output notebook in file manager",
                        metadata_entries=[
                            EventMetadataEntry.fspath(
                                executed_notebook_materialization_path,
                                label="executed_notebook_path",
                            )
                        ],
                    )

                    # pylint: disable=no-member
                    if isinstance(ex, PapermillExecutionError) and (
                            ex.ename == "RetryRequested"
                            or ex.ename == "Failure"):
                        step_execution_context.log.warn(
                            f"Encountered raised {ex.ename} in notebook. Use dagstermill.yield_event "
                            "with RetryRequested or Failure to trigger their behavior."
                        )

                    raise

            step_execution_context.log.debug(
                "Notebook execution complete for {name} at {executed_notebook_path}."
                .format(
                    name=name,
                    executed_notebook_path=executed_notebook_path,
                ))

            executed_notebook_file_handle = None
            try:
                # use binary mode when when moving the file since certain file_managers such as S3
                # may try to hash the contents
                with open(executed_notebook_path, "rb") as fd:
                    executed_notebook_file_handle = step_context.resources.file_manager.write(
                        fd, mode="wb", ext="ipynb")
                    executed_notebook_materialization_path = executed_notebook_file_handle.path_desc
            except Exception:  # pylint: disable=broad-except
                step_context.log.warning(
                    "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}"
                    .format(exc=str(
                        serializable_error_info_from_exc_info(
                            sys.exc_info()))))
                executed_notebook_materialization_path = executed_notebook_path

            yield AssetMaterialization(
                asset_key=(asset_key_prefix + [f"{name}_output_notebook"]),
                description="Location of output notebook in file manager",
                metadata_entries=[
                    EventMetadataEntry.fspath(
                        executed_notebook_materialization_path)
                ],
            )

            if output_notebook is not None:
                yield Output(executed_notebook_file_handle, output_notebook)

            # deferred import for perf
            import scrapbook

            output_nb = scrapbook.read_notebook(executed_notebook_path)

            for (output_name, output_def
                 ) in step_execution_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    value = read_value(output_def.dagster_type,
                                       data_dict[output_name])

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                if key.startswith("event-"):
                    with open(value.data, "rb") as fd:
                        event = pickle.loads(fd.read())
                        if isinstance(event, (Failure, RetryRequested)):
                            raise event
                        else:
                            yield event
Ejemplo n.º 20
0
    def _t_fn(compute_context, inputs):
        check.inst_param(compute_context, 'compute_context',
                         ComputeExecutionContext)
        check.param_invariant(
            isinstance(compute_context.environment_dict, dict),
            'context',
            'SystemComputeExecutionContext must have valid environment_dict',
        )

        system_compute_context = compute_context.get_system_context()

        base_dir = '/tmp/dagstermill/{run_id}/'.format(
            run_id=compute_context.run_id)
        output_notebook_dir = os.path.join(base_dir, 'output_notebooks/')
        mkdir_p(output_notebook_dir)

        temp_path = os.path.join(
            output_notebook_dir,
            '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())))

        with safe_tempfile_path() as output_log_path:
            # Scaffold the registration here
            nb = load_notebook_node(notebook_path)
            nb_no_parameters = replace_parameters(
                system_compute_context,
                nb,
                get_papermill_parameters(system_compute_context, inputs,
                                         output_log_path),
            )
            intermediate_path = os.path.join(
                output_notebook_dir,
                '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4())))
            write_ipynb(nb_no_parameters, intermediate_path)

            with user_code_error_boundary(
                    DagstermillExecutionError,
                    lambda:
                ('Error occurred during the execution of Dagstermill solid '
                 '{solid_name}: {notebook_path}'.format(
                     solid_name=name, notebook_path=notebook_path)),
            ):
                try:
                    papermill_engines.register('dagstermill',
                                               DagstermillNBConvertEngine)
                    papermill.execute_notebook(intermediate_path,
                                               temp_path,
                                               engine_name='dagstermill',
                                               log_output=True)
                except Exception as exc:
                    yield Materialization(
                        label='output_notebook',
                        description=
                        'Location of output notebook on the filesystem',
                        metadata_entries=[
                            EventMetadataEntry.fspath(temp_path)
                        ],
                    )
                    raise exc

            # deferred import for perf
            import scrapbook

            output_nb = scrapbook.read_notebook(temp_path)

            system_compute_context.log.debug(
                'Notebook execution complete for {name}. Data is {data}'.
                format(name=name, data=output_nb.scraps))

            yield Materialization(
                label='output_notebook',
                description='Location of output notebook on the filesystem',
                metadata_entries=[EventMetadataEntry.fspath(temp_path)],
            )

            for (output_name, output_def
                 ) in system_compute_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    value = read_value(output_def.dagster_type,
                                       data_dict[output_name])

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                if key.startswith('event-'):
                    with open(value.data, 'rb') as fd:
                        yield pickle.loads(fd.read())