def _t_fn(compute_context, inputs): check.inst_param(compute_context, "compute_context", SolidExecutionContext) check.param_invariant( isinstance(compute_context.run_config, dict), "context", "SystemComputeExecutionContext must have valid run_config", ) system_compute_context = compute_context.get_system_context() with seven.TemporaryDirectory() as output_notebook_dir: with safe_tempfile_path() as output_log_path: parameterized_notebook_path = os.path.join( output_notebook_dir, "{prefix}-inter.ipynb".format(prefix=str(uuid.uuid4()))) executed_notebook_path = os.path.join( output_notebook_dir, "{prefix}-out.ipynb".format(prefix=str(uuid.uuid4()))) # Scaffold the registration here nb = load_notebook_node(notebook_path) nb_no_parameters = replace_parameters( system_compute_context, nb, get_papermill_parameters(system_compute_context, inputs, output_log_path), ) write_ipynb(nb_no_parameters, parameterized_notebook_path) with user_code_error_boundary( DagstermillExecutionError, lambda: ("Error occurred during the execution of Dagstermill solid " "{solid_name}: {notebook_path}".format( solid_name=name, notebook_path=notebook_path)), ): try: papermill_engines.register("dagstermill", DagstermillNBConvertEngine) papermill.execute_notebook( input_path=parameterized_notebook_path, output_path=executed_notebook_path, engine_name="dagstermill", log_output=True, ) except Exception as exc: # pylint: disable=broad-except try: with open(executed_notebook_path, "rb") as fd: executed_notebook_file_handle = compute_context.resources.file_manager.write( fd, mode="wb", ext="ipynb") executed_notebook_materialization_path = ( executed_notebook_file_handle.path_desc) except Exception as exc_inner: # pylint: disable=broad-except compute_context.log.warning( "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}" .format(exc=exc_inner)) executed_notebook_materialization_path = executed_notebook_path yield AssetMaterialization( asset_key=(asset_key_prefix + [f"{name}_output_notebook"]), description= "Location of output notebook in file manager", metadata_entries=[ EventMetadataEntry.fspath( executed_notebook_materialization_path, label="executed_notebook_path", ) ], ) raise exc system_compute_context.log.debug( "Notebook execution complete for {name} at {executed_notebook_path}." .format( name=name, executed_notebook_path=executed_notebook_path, )) executed_notebook_file_handle = None try: # use binary mode when when moving the file since certain file_managers such as S3 # may try to hash the contents with open(executed_notebook_path, "rb") as fd: executed_notebook_file_handle = compute_context.resources.file_manager.write( fd, mode="wb", ext="ipynb") executed_notebook_materialization_path = executed_notebook_file_handle.path_desc except Exception as exc: # pylint: disable=broad-except compute_context.log.warning( "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}" .format(exc=str(exc))) executed_notebook_materialization_path = executed_notebook_path yield AssetMaterialization( asset_key=(asset_key_prefix + [f"{name}_output_notebook"]), description="Location of output notebook in file manager", metadata_entries=[ EventMetadataEntry.fspath( executed_notebook_materialization_path) ], ) if output_notebook is not None: yield Output(executed_notebook_file_handle, output_notebook) # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(executed_notebook_path) for (output_name, output_def ) in system_compute_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: value = read_value(output_def.dagster_type, data_dict[output_name]) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith("event-"): with open(value.data, "rb") as fd: yield pickle.loads(fd.read())
def _t_fn(step_context, inputs): check.inst_param(step_context, "step_context", SolidExecutionContext) check.param_invariant( isinstance(step_context.run_config, dict), "context", "StepExecutionContext must have valid run_config", ) step_execution_context = step_context.get_step_execution_context() with tempfile.TemporaryDirectory() as output_notebook_dir: with safe_tempfile_path() as output_log_path: parameterized_notebook_path = os.path.join( output_notebook_dir, "{prefix}-inter.ipynb".format(prefix=str(uuid.uuid4()))) executed_notebook_path = os.path.join( output_notebook_dir, "{prefix}-out.ipynb".format(prefix=str(uuid.uuid4()))) # Scaffold the registration here nb = load_notebook_node(notebook_path) nb_no_parameters = replace_parameters( step_execution_context, nb, get_papermill_parameters(step_execution_context, inputs, output_log_path), ) write_ipynb(nb_no_parameters, parameterized_notebook_path) try: papermill_engines.register("dagstermill", DagstermillNBConvertEngine) papermill.execute_notebook( input_path=parameterized_notebook_path, output_path=executed_notebook_path, engine_name="dagstermill", log_output=True, ) except Exception as ex: # pylint: disable=broad-except try: with open(executed_notebook_path, "rb") as fd: executed_notebook_file_handle = ( step_context.resources.file_manager.write( fd, mode="wb", ext="ipynb")) executed_notebook_materialization_path = ( executed_notebook_file_handle.path_desc) except Exception: # pylint: disable=broad-except step_context.log.warning( "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}" .format(exc=str( serializable_error_info_from_exc_info( sys.exc_info())))) executed_notebook_materialization_path = executed_notebook_path yield AssetMaterialization( asset_key=(asset_key_prefix + [f"{name}_output_notebook"]), description= "Location of output notebook in file manager", metadata_entries=[ EventMetadataEntry.fspath( executed_notebook_materialization_path, label="executed_notebook_path", ) ], ) # pylint: disable=no-member if isinstance(ex, PapermillExecutionError) and ( ex.ename == "RetryRequested" or ex.ename == "Failure"): step_execution_context.log.warn( f"Encountered raised {ex.ename} in notebook. Use dagstermill.yield_event " "with RetryRequested or Failure to trigger their behavior." ) raise step_execution_context.log.debug( "Notebook execution complete for {name} at {executed_notebook_path}." .format( name=name, executed_notebook_path=executed_notebook_path, )) executed_notebook_file_handle = None try: # use binary mode when when moving the file since certain file_managers such as S3 # may try to hash the contents with open(executed_notebook_path, "rb") as fd: executed_notebook_file_handle = step_context.resources.file_manager.write( fd, mode="wb", ext="ipynb") executed_notebook_materialization_path = executed_notebook_file_handle.path_desc except Exception: # pylint: disable=broad-except step_context.log.warning( "Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}" .format(exc=str( serializable_error_info_from_exc_info( sys.exc_info())))) executed_notebook_materialization_path = executed_notebook_path yield AssetMaterialization( asset_key=(asset_key_prefix + [f"{name}_output_notebook"]), description="Location of output notebook in file manager", metadata_entries=[ EventMetadataEntry.fspath( executed_notebook_materialization_path) ], ) if output_notebook is not None: yield Output(executed_notebook_file_handle, output_notebook) # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(executed_notebook_path) for (output_name, output_def ) in step_execution_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: value = read_value(output_def.dagster_type, data_dict[output_name]) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith("event-"): with open(value.data, "rb") as fd: event = pickle.loads(fd.read()) if isinstance(event, (Failure, RetryRequested)): raise event else: yield event
def _t_fn(compute_context, inputs): check.inst_param(compute_context, 'compute_context', ComputeExecutionContext) check.param_invariant( isinstance(compute_context.environment_dict, dict), 'context', 'SystemComputeExecutionContext must have valid environment_dict', ) system_compute_context = compute_context.get_system_context() base_dir = '/tmp/dagstermill/{run_id}/'.format( run_id=compute_context.run_id) output_notebook_dir = os.path.join(base_dir, 'output_notebooks/') mkdir_p(output_notebook_dir) temp_path = os.path.join( output_notebook_dir, '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4()))) with safe_tempfile_path() as output_log_path: # Scaffold the registration here nb = load_notebook_node(notebook_path) nb_no_parameters = replace_parameters( system_compute_context, nb, get_papermill_parameters(system_compute_context, inputs, output_log_path), ) intermediate_path = os.path.join( output_notebook_dir, '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4()))) write_ipynb(nb_no_parameters, intermediate_path) with user_code_error_boundary( DagstermillExecutionError, lambda: ('Error occurred during the execution of Dagstermill solid ' '{solid_name}: {notebook_path}'.format( solid_name=name, notebook_path=notebook_path)), ): try: papermill_engines.register('dagstermill', DagstermillNBConvertEngine) papermill.execute_notebook(intermediate_path, temp_path, engine_name='dagstermill', log_output=True) except Exception as exc: yield Materialization( label='output_notebook', description= 'Location of output notebook on the filesystem', metadata_entries=[ EventMetadataEntry.fspath(temp_path) ], ) raise exc # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(temp_path) system_compute_context.log.debug( 'Notebook execution complete for {name}. Data is {data}'. format(name=name, data=output_nb.scraps)) yield Materialization( label='output_notebook', description='Location of output notebook on the filesystem', metadata_entries=[EventMetadataEntry.fspath(temp_path)], ) for (output_name, output_def ) in system_compute_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: value = read_value(output_def.dagster_type, data_dict[output_name]) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith('event-'): with open(value.data, 'rb') as fd: yield pickle.loads(fd.read())
def _t_fn(compute_context, inputs): check.inst_param(compute_context, 'compute_context', SolidExecutionContext) check.param_invariant( isinstance(compute_context.environment_dict, dict), 'context', 'SystemComputeExecutionContext must have valid environment_dict', ) system_compute_context = compute_context.get_system_context() with seven.TemporaryDirectory() as output_notebook_dir: with safe_tempfile_path() as output_log_path: parameterized_notebook_path = os.path.join( output_notebook_dir, '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4()))) executed_notebook_path = os.path.join( output_notebook_dir, '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4()))) # Scaffold the registration here nb = load_notebook_node(notebook_path) nb_no_parameters = replace_parameters( system_compute_context, nb, get_papermill_parameters(system_compute_context, inputs, output_log_path), ) write_ipynb(nb_no_parameters, parameterized_notebook_path) with user_code_error_boundary( DagstermillExecutionError, lambda: ('Error occurred during the execution of Dagstermill solid ' '{solid_name}: {notebook_path}'.format( solid_name=name, notebook_path=notebook_path)), ): try: papermill_engines.register('dagstermill', DagstermillNBConvertEngine) papermill.execute_notebook( input_path=parameterized_notebook_path, output_path=executed_notebook_path, engine_name='dagstermill', log_output=True, ) except Exception as exc: # pylint: disable=broad-except try: with open(executed_notebook_path, 'r') as fd: executed_notebook_file_handle = compute_context.file_manager.write( fd, mode='w', ext='ipynb') executed_notebook_materialization_path = ( executed_notebook_file_handle.path_desc) except Exception as exc_inner: # pylint: disable=broad-except compute_context.log.warning( 'Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}' .format(exc=exc_inner)) executed_notebook_materialization_path = executed_notebook_path yield Materialization( label='output_notebook', description= 'Location of output notebook in file manager', metadata_entries=[ EventMetadataEntry.fspath( executed_notebook_materialization_path) ], ) raise exc system_compute_context.log.debug( 'Notebook execution complete for {name} at {executed_notebook_path}.' .format( name=name, executed_notebook_path=executed_notebook_path, )) try: with open(executed_notebook_path, 'r') as fd: executed_notebook_file_handle = compute_context.file_manager.write( fd, mode='w', ext='ipynb') executed_notebook_materialization_path = executed_notebook_file_handle.path_desc except Exception as exc: # pylint: disable=broad-except compute_context.log.warning( 'Error when attempting to materialize executed notebook using file manager (falling back to local): {exc}' .format(exc=str(exc))) executed_notebook_materialization_path = executed_notebook_path yield Materialization( label='output_notebook', description='Location of output notebook in file manager', metadata_entries=[ EventMetadataEntry.fspath( executed_notebook_materialization_path) ], ) if output_notebook is not None: yield Output(executed_notebook_file_handle, output_notebook) # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(executed_notebook_path) for (output_name, output_def ) in system_compute_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: value = read_value(output_def.dagster_type, data_dict[output_name]) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith('event-'): with open(value.data, 'rb') as fd: yield pickle.loads(fd.read())
def _t_fn(step_context, inputs): check.inst_param(step_context, "step_context", SolidExecutionContext) check.param_invariant( isinstance(step_context.run_config, dict), "context", "StepExecutionContext must have valid run_config", ) step_execution_context = step_context.get_step_execution_context() with tempfile.TemporaryDirectory() as output_notebook_dir: with safe_tempfile_path() as output_log_path: prefix = str(uuid.uuid4()) parameterized_notebook_path = os.path.join( output_notebook_dir, f"{prefix}-inter.ipynb") executed_notebook_path = os.path.join(output_notebook_dir, f"{prefix}-out.ipynb") # Scaffold the registration here nb = load_notebook_node(notebook_path) compute_descriptor = ("solid" if dagster_factory_name == "define_dagstermill_solid" else "op") nb_no_parameters = replace_parameters( step_execution_context, nb, get_papermill_parameters(step_execution_context, inputs, output_log_path, compute_descriptor), ) write_ipynb(nb_no_parameters, parameterized_notebook_path) try: papermill_engines.register("dagstermill", DagstermillEngine) papermill.execute_notebook( input_path=parameterized_notebook_path, output_path=executed_notebook_path, engine_name="dagstermill", log_output=True, ) except Exception as ex: step_execution_context.log.warn( "Error when attempting to materialize executed notebook: {exc}" .format(exc=str( serializable_error_info_from_exc_info( sys.exc_info())))) # pylint: disable=no-member # compat: if isinstance( ex, ExecutionError) and (ex.ename == "RetryRequested" or ex.ename == "Failure"): step_execution_context.log.warn( f"Encountered raised {ex.ename} in notebook. Use dagstermill.yield_event " "with RetryRequested or Failure to trigger their behavior." ) raise step_execution_context.log.debug( "Notebook execution complete for {name} at {executed_notebook_path}." .format( name=name, executed_notebook_path=executed_notebook_path, )) if output_notebook_name is not None: # yield output notebook binary stream as a solid output with open(executed_notebook_path, "rb") as fd: yield Output(fd.read(), output_notebook_name) else: # backcompat executed_notebook_file_handle = None try: # use binary mode when when moving the file since certain file_managers such as S3 # may try to hash the contents with open(executed_notebook_path, "rb") as fd: executed_notebook_file_handle = step_context.resources.file_manager.write( fd, mode="wb", ext="ipynb") executed_notebook_materialization_path = ( executed_notebook_file_handle.path_desc) yield AssetMaterialization( asset_key=(asset_key_prefix + [f"{name}_output_notebook"]), description= "Location of output notebook in file manager", metadata_entries=[ MetadataEntry.fspath( executed_notebook_materialization_path) ], ) except Exception: # if file manager writing errors, e.g. file manager is not provided, we throw a warning # and fall back to the previously stored temp executed notebook. step_context.log.warning( "Error when attempting to materialize executed notebook using file manager: " f"{str(serializable_error_info_from_exc_info(sys.exc_info()))}" f"\nNow falling back to local: notebook execution was temporarily materialized at {executed_notebook_path}" "\nIf you have supplied a file manager and expect to use it for materializing the " 'notebook, please include "file_manager" in the `required_resource_keys` argument ' f"to `{dagster_factory_name}`") if output_notebook is not None: yield Output(executed_notebook_file_handle, output_notebook) # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(executed_notebook_path) for (output_name, _) in step_execution_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: # read outputs that were passed out of process via io manager from `yield_result` step_output_handle = StepOutputHandle( step_key=step_execution_context.step.key, output_name=output_name) output_context = step_execution_context.get_output_context( step_output_handle) io_manager = step_execution_context.get_io_manager( step_output_handle) value = io_manager.load_input( build_input_context(upstream_output=output_context)) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith("event-"): with open(value.data, "rb") as fd: event = pickle.loads(fd.read()) if isinstance(event, (Failure, RetryRequested)): raise event else: yield event
def _t_fn(compute_context, inputs): check.inst_param(compute_context, 'compute_context', ComputeExecutionContext) check.param_invariant( isinstance(compute_context.environment_dict, dict), 'context', 'SystemComputeExecutionContext must have valid environment_dict', ) system_compute_context = compute_context.get_system_context() base_dir = '/tmp/dagstermill/{run_id}/'.format( run_id=compute_context.run_id) output_notebook_dir = os.path.join(base_dir, 'output_notebooks/') mkdir_p(output_notebook_dir) temp_path = os.path.join( output_notebook_dir, '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4()))) with tempfile.NamedTemporaryFile() as output_log_file: output_log_path = output_log_file.name init_db(output_log_path) nb = load_notebook_node(notebook_path) nb_no_parameters = replace_parameters( system_compute_context, nb, get_papermill_parameters(system_compute_context, inputs, output_log_path), ) intermediate_path = os.path.join( output_notebook_dir, '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4()))) write_ipynb(nb_no_parameters, intermediate_path) # Although the type of is_done is threading._Event in py2, not threading.Event, # it is still constructed using the threading.Event() factory is_done = threading.Event() def log_watcher_thread_target(): log_watcher = JsonSqlite3LogWatcher(output_log_path, system_compute_context.log, is_done) log_watcher.watch() log_watcher_thread = threading.Thread( target=log_watcher_thread_target) log_watcher_thread.start() with user_code_error_boundary( DagstermillExecutionError, lambda: 'Error occurred during the execution of Dagstermill solid ' '{solid_name}: {notebook_path}'.format( solid_name=name, notebook_path=notebook_path), ): try: papermill_engines.register('dagstermill', DagstermillNBConvertEngine) papermill.execute_notebook(intermediate_path, temp_path, engine_name='dagstermill', log_output=True) except Exception as exc: yield Materialization( label='output_notebook', description= 'Location of output notebook on the filesystem', metadata_entries=[ EventMetadataEntry.fspath(temp_path) ], ) raise exc finally: is_done.set() log_watcher_thread.join() output_nb = scrapbook.read_notebook(temp_path) system_compute_context.log.debug( 'Notebook execution complete for {name}. Data is {data}'. format(name=name, data=output_nb.scraps)) yield Materialization( label='output_notebook', description='Location of output notebook on the filesystem', metadata_entries=[EventMetadataEntry.fspath(temp_path)], ) for (output_name, output_def ) in system_compute_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: value = read_value(output_def.runtime_type, data_dict[output_name]) yield Output(value, output_name) for key, value in output_nb.scraps.items(): print(output_nb.scraps) if key.startswith('event-'): with open(value.data, 'rb') as fd: yield pickle.loads(fd.read())