class Manager: def __init__(self): self.repository_def = None self.populated_by_papermill = False self.pipeline_def = None self.solid_def = None self.marshal_dir = None self.context = None self.input_name_type_dict = None self.output_name_type_dict = None self.solid_def_name = None def register_repository(self, repository_def): self.repository_def = repository_def def define_out_of_pipeline_context(self, context_config): pipeline_def = PipelineDefinition([], name='Ephemeral Notebook Pipeline') # BUG: If the context cleans up after itself (e.g. closes a db connection or similar) # This will instigate that process *before* return. We are going to have to # manage this manually (without an if block) in order to make this work. # See https://github.com/dagster-io/dagster/issues/796 with yield_pipeline_execution_context( pipeline_def, {} if context_config is None else {'context': context_config}, RunConfig(run_id=''), ) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context) return self.context def yield_result(self, value, output_name): if not self.populated_by_papermill: return value if self.solid_def is None: if output_name not in self.output_name_type_dict: raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def_name, output_name=output_name)) runtime_type_enum = self.output_name_type_dict[output_name] if runtime_type_enum == SerializableRuntimeType.SCALAR: pm.record(output_name, value) elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable( value): pm.record(output_name, value) elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) serialize_to_file( MANAGER_FOR_NOTEBOOK_INSTANCE.context, PickleSerializationStrategy(), value, out_file, ) pm.record(output_name, out_file) else: raise DagstermillError( 'Output Definition for output {output_name} requires repo registration ' 'since it has a complex serialization format'.format( output_name=output_name)) else: if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) pm.record(output_name, write_value(runtime_type, value, out_file)) def populate_context( self, run_id, solid_def_name, pipeline_def_name, marshal_dir, environment_dict, output_log_path, input_name_type_dict, output_name_type_dict, ): check.dict_param(environment_dict, 'environment_dict') self.populated_by_papermill = True self.solid_def_name = solid_def_name self.marshal_dir = marshal_dir if self.repository_def is None: self.pipeline_def = PipelineDefinition( [], name='Dummy Pipeline (No Repo Registration)') self.input_name_type_dict = dict_to_enum(input_name_type_dict) self.output_name_type_dict = dict_to_enum(output_name_type_dict) for _, runtime_type_enum in self.input_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have inputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dm.register_repository(repository_def)' ) for _, runtime_type_enum in self.output_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have outputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dm.register_repository(repository_def).' ) with yield_pipeline_execution_context( self.pipeline_def, {}, RunConfig(run_id=run_id)) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context) else: self.pipeline_def = self.repository_def.get_pipeline( pipeline_def_name) check.invariant(self.pipeline_def.has_solid_def(solid_def_name)) self.solid_def = self.pipeline_def.solid_def_named(solid_def_name) loggers = None if output_log_path != 0: # there is no output log event_logger = construct_json_event_logger(output_log_path) loggers = [event_logger] # do not include event_callback in ExecutionMetadata, # since that'll be taken care of by side-channel established by event_logger execution_metadata = RunConfig(run_id, loggers=loggers) # See block comment above referencing this issue # See https://github.com/dagster-io/dagster/issues/796 with yield_pipeline_execution_context( self.pipeline_def, environment_dict, execution_metadata) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context) return self.context
class Manager: def __init__(self): self.repository_def = None self.populated_by_papermill = False self.pipeline_def = None self.solid_def = None self.marshal_dir = None self.context = None self.input_name_type_dict = None self.output_name_type_dict = None self.solid_def_name = None self.resources_stack = None def register_repository(self, repository_def): self.repository_def = repository_def def deregister_repository(self): # This function is intended to support test cases, and should not be invoked # from user notebooks. self.repository_def = None @contextmanager def setup_resources(self, pipeline_def, environment_config, run_config, log_manager): '''This context manager is a drop-in replacement for dagster.core.execution.context_creation_pipeline.create_resources. It uses the Manager's instance of ResourceStack to create resources, but does not tear them down when the context manager returns -- teardown must be managed manually using Manager.teardown(). ''' # pylint: disable=protected-access self.resources_stack = ResourcesStack( pipeline_def, environment_config, run_config, log_manager ) yield self.resources_stack.create() def define_out_of_pipeline_context(self, config=None): '''Defines a context to be used in a notebook (i.e., not in pipeline execution). ''' config = check.opt_dict_param(config, 'config') pipeline_def = PipelineDefinition([], name='Ephemeral Notebook Pipeline') if config.keys(): warnings.warn( 'Config keys will not be respected for in-notebook ' 'execution: [{keys}]'.format( keys=', '.join(['\'{key}\''.format(key=key) for key in config.keys()]) ) ) config = {} run_config = RunConfig() with scoped_pipeline_context( pipeline_def, config, run_config, scoped_resources_builder_cm=self.setup_resources ) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context, out_of_pipeline=True ) if self.context.resources: # pylint: disable=protected-access warnings.warn( 'Call dagstermill.teardown() to finalize resources attached to the context.' ) return self.context def yield_result(self, value, output_name='result'): if not self.populated_by_papermill: return value if self.solid_def is None: if output_name not in self.output_name_type_dict: raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'.format( solid_name=self.solid_def_name, output_name=output_name ) ) runtime_type_enum = self.output_name_type_dict[output_name] if runtime_type_enum == SerializableRuntimeType.SCALAR: scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable(value): scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) PickleSerializationStrategy().serialize_to_file(value, out_file) scrapbook.glue(output_name, out_file) else: raise DagstermillError( # Discuss this in the docs and improve error message # https://github.com/dagster-io/dagster/issues/1275 # https://github.com/dagster-io/dagster/issues/1276 'Output Definition for output {output_name} requires repo registration ' 'since it has a complex serialization format'.format(output_name=output_name) ) else: if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'.format( solid_name=self.solid_def.name, output_name=output_name ) ) runtime_type = self.solid_def.output_def_named(output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) scrapbook.glue(output_name, write_value(runtime_type, value, out_file)) def yield_event(self, dagster_event): if not self.populated_by_papermill: return dagster_event event_id = 'event-{event_uuid}'.format(event_uuid=str(uuid.uuid4())) out_file_path = os.path.join(self.marshal_dir, event_id) with open(out_file_path, 'wb') as fd: fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL)) scrapbook.glue(event_id, out_file_path) def populate_context( self, run_id=None, mode=None, solid_def_name=None, pipeline_name=None, marshal_dir=None, environment_config=None, input_name_type_dict=None, output_name_type_dict=None, output_log_path=None, **_kwargs ): check.str_param(run_id, 'run_id') check.str_param(mode, 'mode') check.str_param(solid_def_name, 'solid_def_name') check.str_param(pipeline_name, 'pipeline_name') check.str_param(marshal_dir, 'marshal_dir') check.dict_param(environment_config, 'environment_config') check.dict_param(input_name_type_dict, 'input_name_type_dict') check.dict_param(output_name_type_dict, 'output_name_type_dict') check.str_param(output_log_path, 'output_log_path') self.populated_by_papermill = True self.solid_def_name = solid_def_name self.marshal_dir = marshal_dir logger_def = construct_logger(output_log_path) loggers = {'dagstermill': logger_def} if self.repository_def is None: self.solid_def = None self.pipeline_def = PipelineDefinition( [], mode_definitions=[ModeDefinition(loggers=loggers)], name='Dummy Pipeline (No Repo Registration)', ) self.input_name_type_dict = dict_to_enum(input_name_type_dict) self.output_name_type_dict = dict_to_enum(output_name_type_dict) for _, runtime_type_enum in self.input_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have inputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dagstermill.register_repository(repository_def)' ) for _, runtime_type_enum in self.output_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have outputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dagstermill.register_repository(repository_def).' ) environment_config = {'loggers': {'dagstermill': {}}} run_config = RunConfig(run_id=run_id, mode=mode) else: self.pipeline_def = self.repository_def.get_pipeline(pipeline_name) check.invariant( self.pipeline_def.has_solid_def(solid_def_name), 'solid {} not found'.format(solid_def_name), ) self.solid_def = self.pipeline_def.solid_def_named(solid_def_name) logger = logger_def.logger_fn( InitLoggerContext({}, self.pipeline_def, logger_def, run_id) ) run_config = RunConfig(run_id, loggers=[logger], mode=mode) with scoped_pipeline_context( self.pipeline_def, environment_config, run_config, scoped_resources_builder_cm=self.setup_resources, ) as pipeline_context: self.context = DagstermillInNotebookExecutionContext(pipeline_context) return self.context def teardown_resources(self): if self.resources_stack is not None: self.resources_stack.teardown()
class Manager: def __init__(self): self.repository_def = None self.populated_by_papermill = False self.pipeline_def = None self.solid_def = None self.marshal_dir = None self.context = None self.input_name_type_dict = None self.output_name_type_dict = None self.solid_def_name = None def register_repository(self, repository_def): self.repository_def = repository_def def deregister_repository(self): self.repository_def = None def define_out_of_pipeline_context(self, config=None): '''Defines a context to be used in a notebook (i.e., not in pipeline execution). ''' config = check.opt_dict_param(config, 'config') pipeline_def = PipelineDefinition([], name='Ephemeral Notebook Pipeline') # BUG: If the context cleans up after itself (e.g. closes a db connection or similar) # This will instigate that process *before* return. We are going to have to # manage this manually (without an if block) in order to make this work. # See https://github.com/dagster-io/dagster/issues/796 if config.keys(): warnings.warn( 'Config keys will not be respected for in-notebook ' 'execution: [{keys}]'.format(keys=', '.join( ['\'{key}\''.format(key=key) for key in config.keys()]))) config = {} with scoped_pipeline_context(pipeline_def, config, RunConfig(run_id='')) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context, out_of_pipeline=True) return self.context def yield_result(self, value, output_name='result'): if not self.populated_by_papermill: return value if self.solid_def is None: if output_name not in self.output_name_type_dict: raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def_name, output_name=output_name)) runtime_type_enum = self.output_name_type_dict[output_name] if runtime_type_enum == SerializableRuntimeType.SCALAR: scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable( value): scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) PickleSerializationStrategy().serialize_to_file( value, out_file) scrapbook.glue(output_name, out_file) else: raise DagstermillError( # Discuss this in the docs and improve error message # https://github.com/dagster-io/dagster/issues/1275 # https://github.com/dagster-io/dagster/issues/1276 'Output Definition for output {output_name} requires repo registration ' 'since it has a complex serialization format'.format( output_name=output_name)) else: if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) scrapbook.glue(output_name, write_value(runtime_type, value, out_file)) def yield_materialization(self, path, description): if not self.populated_by_papermill: return Materialization(path, description) materialization_id = 'materialization-{materialization_uuid}'.format( materialization_uuid=str(uuid.uuid4())) out_file_path = os.path.join(self.marshal_dir, materialization_id) with open(out_file_path, 'wb') as fd: fd.write( pickle.dumps(Materialization(path, description), PICKLE_PROTOCOL)) scrapbook.glue(materialization_id, out_file_path) def populate_context( self, run_id, mode, solid_def_name, pipeline_def_name, marshal_dir, environment_dict, input_name_type_dict, output_name_type_dict, output_log_path, ): check.dict_param(environment_dict, 'environment_dict') self.populated_by_papermill = True self.solid_def_name = solid_def_name self.marshal_dir = marshal_dir logger_def = construct_logger(output_log_path) loggers = {'dagstermill': logger_def} if self.repository_def is None: self.solid_def = None self.pipeline_def = PipelineDefinition( [], mode_definitions=[ModeDefinition(loggers=loggers)], name='Dummy Pipeline (No Repo Registration)', ) self.input_name_type_dict = dict_to_enum(input_name_type_dict) self.output_name_type_dict = dict_to_enum(output_name_type_dict) for _, runtime_type_enum in self.input_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have inputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dm.register_repository(repository_def)' ) for _, runtime_type_enum in self.output_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have outputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dm.register_repository(repository_def).' ) with scoped_pipeline_context( self.pipeline_def, {'loggers': { 'dagstermill': {} }}, RunConfig(run_id=run_id, mode=mode), ) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context) else: self.pipeline_def = self.repository_def.get_pipeline( pipeline_def_name) check.invariant(self.pipeline_def.has_solid_def(solid_def_name)) self.solid_def = self.pipeline_def.solid_def_named(solid_def_name) logger = logger_def.logger_fn( InitLoggerContext({}, self.pipeline_def, logger_def, run_id)) run_config = RunConfig(run_id, loggers=[logger], mode=mode) # See block comment above referencing this issue # See https://github.com/dagster-io/dagster/issues/796 with scoped_pipeline_context(self.pipeline_def, environment_dict, run_config) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context) return self.context