def test_sink_logger(): run_id = str(uuid.uuid4()) with safe_tempfile_path() as sqlite3_db_path: sink = SqliteEventSink(sqlite3_db_path) sqlite3_log_manager = DagsterLogManager(run_id, {}, [sink.get_logger()]) for i in range(1000): sqlite3_log_manager.info('Testing ' + str(i)) with sqlite3.connect(sqlite3_db_path) as conn: cursor = conn.cursor() count = cursor.execute('select count(1) from logs').fetchall() assert count[0][0] == 1000 conn.close() sink.on_pipeline_teardown()
def execute_step_out_of_process(step_context, step): with safe_tempfile_path() as sqlite_file: event_sink = SqliteEventSink(sqlite_file, raise_on_error=True) child_run_config = RunConfig( run_id=step_context.run_config.run_id, tags=step_context.run_config.tags, event_sink=event_sink, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) command = InProcessExecutorChildProcessCommand( step_context.environment_dict, child_run_config, step_context.executor_config, step.key) with event_sink.log_forwarding(step_context.log): for event_or_none in execute_child_process_command(command): yield event_or_none
def test_concurrent_multithreaded_logging(): test_log_records = [] run_id = str(uuid.uuid4()) with safe_tempfile_path() as sqlite3_db_path: wrap_it_up = threading.Event() event_sink = SqliteEventSink(sqlite3_db_path) sqlite3_thread = threading.Thread(target=write_logs, args=(event_sink, run_id)) test_thread = threading.Thread(target=forward_logs, args=(event_sink, wrap_it_up, run_id, test_log_records)) sqlite3_thread.start() test_thread.start() try: sqlite3_thread.join() finally: wrap_it_up.set() assert wrap_it_up.is_set() test_thread.join() assert len(test_log_records) == 1000 with sqlite3.connect(sqlite3_db_path) as conn: cursor = conn.cursor() count = cursor.execute('select count(1) from logs').fetchall() assert count[0][0] == 1000 records = cursor.execute('select * from logs').fetchall() for i, record in enumerate(records): json_record = record[1] assert json_record == seven.json.dumps( test_log_records[i].__dict__) conn.close()
def test_error_during_logging(caplog): run_id = str(uuid.uuid4()) with safe_tempfile_path() as sqlite3_db_path: event_sink = SqliteEventSink(sqlite3_db_path) def err_conn(*args, **kwargs): raise Exception('Bailing!') event_sink.connect = err_conn sqlite3_log_manager = DagsterLogManager(run_id, {}, [event_sink.get_logger()]) sqlite3_log_manager.info('Testing error handling') assert caplog.record_tuples == [ ('root', 50, 'Error during logging!'), ('root', 40, 'Bailing!'), ] event_sink.on_pipeline_teardown()
def test_sink_log_forwarding(): test_log_records = [] run_id = str(uuid.uuid4()) with safe_tempfile_path() as sqlite3_db_path: sink = SqliteEventSink(sqlite3_db_path) sqlite3_log_manager = DagsterLogManager(run_id, {}, [sink.get_logger()]) for i in range(1000): sqlite3_log_manager.info('Testing ' + str(i)) with sqlite3.connect(sqlite3_db_path) as conn: cursor = conn.cursor() count = cursor.execute('select count(1) from logs').fetchall() assert count[0][0] == 1000 test_handler = LogTestHandler(test_log_records) test_logger_def = construct_single_handler_logger( 'test', 'debug', test_handler) test_logger = test_logger_def.logger_fn( dummy_init_logger_context(test_logger_def, run_id)) sqlite3_watcher_log_manager = DagsterLogManager( run_id, {}, [test_logger]) with sink.log_forwarding(sqlite3_watcher_log_manager): pass assert len(test_log_records) == 1000 records = cursor.execute('select * from logs').fetchall() for i, record in enumerate(records): json_record = record[1] assert json_record == seven.json.dumps( test_log_records[i].__dict__) conn.close() sink.on_pipeline_teardown()
def reconstitute_pipeline_context( self, output_log_path=None, marshal_dir=None, environment_dict=None, handle_kwargs=None, run_config_kwargs=None, solid_subset=None, solid_handle_kwargs=None, ): '''Reconstitutes a context for dagstermill-managed execution. You'll see this function called to reconstruct a pipeline context within the ``injected parameters`` cell of a dagstermill output notebook. Users should not call this function interactively except when debugging output notebooks. Use :func:`dagstermill.get_context` in the ``parameters`` cell of your notebook to define a context for interactive exploration and development. This call will be replaced by one to :func:`dagstermill.reconstitute_pipeline_context` when the notebook is executed by dagstermill. ''' check.opt_str_param(output_log_path, 'output_log_path') check.opt_str_param(marshal_dir, 'marshal_dir') environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) check.dict_param(run_config_kwargs, 'run_config_kwargs') check.dict_param(handle_kwargs, 'handle_kwargs') check.opt_list_param(solid_subset, 'solid_subset', of_type=str) check.dict_param(solid_handle_kwargs, 'solid_handle_kwargs') try: handle = load_handle.handle_for_pipeline_cli_args( handle_kwargs, use_default_repository_yaml=False) except (check.CheckError, load_handle.CliUsageError) as err: six.raise_from( DagstermillError( 'Cannot invoke a dagstermill solid from an in-memory pipeline that was not loaded ' 'from an ExecutionTargetHandle. Run this pipeline using dagit, the dagster CLI, ' 'through dagster-graphql, or in-memory after loading it through an ' 'ExecutionTargetHandle.'), err, ) pipeline_def = check.inst_param( handle.build_pipeline_definition(), 'pipeline_def (from handle {handle_dict})'.format( handle_dict=handle.data._asdict()), PipelineDefinition, ).build_sub_pipeline(solid_subset) solid_handle = SolidHandle.from_dict(solid_handle_kwargs) solid_def = pipeline_def.get_solid(solid_handle) run_config = RunConfig(**run_config_kwargs) # since we are rehydrating the SqliteEventSink we will skip the db init run_config = run_config.with_event_sink( SqliteEventSink(output_log_path, skip_db_init=True)) self.marshal_dir = marshal_dir self.in_pipeline = True self.solid_def = solid_def self.pipeline_def = pipeline_def with scoped_pipeline_context( self.pipeline_def, environment_dict, run_config, scoped_resources_builder_cm=self._setup_resources, ) as pipeline_context: self.context = DagstermillExecutionContext(pipeline_context) return self.context
def _t_fn(compute_context, inputs): check.inst_param(compute_context, 'compute_context', ComputeExecutionContext) check.param_invariant( isinstance(compute_context.environment_dict, dict), 'context', 'SystemComputeExecutionContext must have valid environment_dict', ) system_compute_context = compute_context.get_system_context() base_dir = '/tmp/dagstermill/{run_id}/'.format(run_id=compute_context.run_id) output_notebook_dir = os.path.join(base_dir, 'output_notebooks/') mkdir_p(output_notebook_dir) temp_path = os.path.join( output_notebook_dir, '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())) ) with safe_tempfile_path() as output_log_path: event_sink = SqliteEventSink(output_log_path) # Scaffold the registration here nb = load_notebook_node(notebook_path) nb_no_parameters = replace_parameters( system_compute_context, nb, get_papermill_parameters(system_compute_context, inputs, output_log_path), ) intermediate_path = os.path.join( output_notebook_dir, '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4())) ) write_ipynb(nb_no_parameters, intermediate_path) with user_code_error_boundary( DagstermillExecutionError, lambda: ( 'Error occurred during the execution of Dagstermill solid ' '{solid_name}: {notebook_path}'.format( solid_name=name, notebook_path=notebook_path ) ), ): with event_sink.log_forwarding(system_compute_context.log): try: papermill_engines.register('dagstermill', DagstermillNBConvertEngine) papermill.execute_notebook( intermediate_path, temp_path, engine_name='dagstermill', log_output=True ) except Exception as exc: yield Materialization( label='output_notebook', description='Location of output notebook on the filesystem', metadata_entries=[EventMetadataEntry.fspath(temp_path)], ) raise exc # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(temp_path) system_compute_context.log.debug( 'Notebook execution complete for {name}. Data is {data}'.format( name=name, data=output_nb.scraps ) ) yield Materialization( label='output_notebook', description='Location of output notebook on the filesystem', metadata_entries=[EventMetadataEntry.fspath(temp_path)], ) for (output_name, output_def) in system_compute_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: value = read_value(output_def.runtime_type, data_dict[output_name]) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith('event-'): with open(value.data, 'rb') as fd: yield pickle.loads(fd.read())