def handle_output(self, context: OutputContext, obj: bytes): """obj: bytes""" check.inst_param(context, "context", OutputContext) # the output notebook itself is stored at output_file_path output_notebook_path = self._get_path(context) mkdir_p(os.path.dirname(output_notebook_path)) with open(output_notebook_path, self.write_mode) as dest_file_obj: dest_file_obj.write(obj) yield MetadataEntry.fspath(path=output_notebook_path, label="path")
def handle_output(self, context, obj): """Pickle the data and store the object to a custom file path. This method emits an AssetMaterialization event so the assets will be tracked by the Asset Catalog. """ check.inst_param(context, "context", OutputContext) metadata = context.metadata path = check.str_param(metadata.get("path"), "metadata.path") filepath = self._get_path(path) # Ensure path exists mkdir_p(os.path.dirname(filepath)) context.log.debug(f"Writing file at: {filepath}") with open(filepath, self.write_mode) as write_obj: pickle.dump(obj, write_obj, PICKLE_PROTOCOL) return AssetMaterialization( asset_key=AssetKey( [context.pipeline_name, context.step_key, context.name]), metadata_entries=[MetadataEntry.fspath(os.path.abspath(filepath))], )
def _t_fn(step_context, inputs): check.inst_param(step_context, "step_context", SolidExecutionContext) check.param_invariant( isinstance(step_context.run_config, dict), "context", "StepExecutionContext must have valid run_config", ) step_execution_context = step_context.get_step_execution_context() with tempfile.TemporaryDirectory() as output_notebook_dir: with safe_tempfile_path() as output_log_path: prefix = str(uuid.uuid4()) parameterized_notebook_path = os.path.join( output_notebook_dir, f"{prefix}-inter.ipynb") executed_notebook_path = os.path.join(output_notebook_dir, f"{prefix}-out.ipynb") # Scaffold the registration here nb = load_notebook_node(notebook_path) compute_descriptor = ("solid" if dagster_factory_name == "define_dagstermill_solid" else "op") nb_no_parameters = replace_parameters( step_execution_context, nb, get_papermill_parameters(step_execution_context, inputs, output_log_path, compute_descriptor), ) write_ipynb(nb_no_parameters, parameterized_notebook_path) try: papermill_engines.register("dagstermill", DagstermillEngine) papermill.execute_notebook( input_path=parameterized_notebook_path, output_path=executed_notebook_path, engine_name="dagstermill", log_output=True, ) except Exception as ex: step_execution_context.log.warn( "Error when attempting to materialize executed notebook: {exc}" .format(exc=str( serializable_error_info_from_exc_info( sys.exc_info())))) # pylint: disable=no-member # compat: if isinstance( ex, ExecutionError) and (ex.ename == "RetryRequested" or ex.ename == "Failure"): step_execution_context.log.warn( f"Encountered raised {ex.ename} in notebook. Use dagstermill.yield_event " "with RetryRequested or Failure to trigger their behavior." ) raise step_execution_context.log.debug( "Notebook execution complete for {name} at {executed_notebook_path}." .format( name=name, executed_notebook_path=executed_notebook_path, )) if output_notebook_name is not None: # yield output notebook binary stream as a solid output with open(executed_notebook_path, "rb") as fd: yield Output(fd.read(), output_notebook_name) else: # backcompat executed_notebook_file_handle = None try: # use binary mode when when moving the file since certain file_managers such as S3 # may try to hash the contents with open(executed_notebook_path, "rb") as fd: executed_notebook_file_handle = step_context.resources.file_manager.write( fd, mode="wb", ext="ipynb") executed_notebook_materialization_path = ( executed_notebook_file_handle.path_desc) yield AssetMaterialization( asset_key=(asset_key_prefix + [f"{name}_output_notebook"]), description= "Location of output notebook in file manager", metadata_entries=[ MetadataEntry.fspath( executed_notebook_materialization_path) ], ) except Exception: # if file manager writing errors, e.g. file manager is not provided, we throw a warning # and fall back to the previously stored temp executed notebook. step_context.log.warning( "Error when attempting to materialize executed notebook using file manager: " f"{str(serializable_error_info_from_exc_info(sys.exc_info()))}" f"\nNow falling back to local: notebook execution was temporarily materialized at {executed_notebook_path}" "\nIf you have supplied a file manager and expect to use it for materializing the " 'notebook, please include "file_manager" in the `required_resource_keys` argument ' f"to `{dagster_factory_name}`") if output_notebook is not None: yield Output(executed_notebook_file_handle, output_notebook) # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(executed_notebook_path) for (output_name, _) in step_execution_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: # read outputs that were passed out of process via io manager from `yield_result` step_output_handle = StepOutputHandle( step_key=step_execution_context.step.key, output_name=output_name) output_context = step_execution_context.get_output_context( step_output_handle) io_manager = step_execution_context.get_io_manager( step_output_handle) value = io_manager.load_input( build_input_context(upstream_output=output_context)) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith("event-"): with open(value.data, "rb") as fd: event = pickle.loads(fd.read()) if isinstance(event, (Failure, RetryRequested)): raise event else: yield event