def test_error_during_logging(caplog): run_id = str(uuid.uuid4()) with tempfile.NamedTemporaryFile() as sqlite3_db: sqlite3_db_path = sqlite3_db.name init_db(sqlite3_db_path) sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path) def err_conn(*args, **kwargs): raise Exception('Bailing!') sqlite3_handler.connect = err_conn sqlite3_logger_def = construct_single_handler_logger( 'sqlite3', 'debug', sqlite3_handler) sqlite3_logger = sqlite3_logger_def.logger_fn( dummy_init_logger_context(sqlite3_logger_def, run_id)) sqlite3_log_manager = DagsterLogManager(run_id, {}, [sqlite3_logger]) sqlite3_log_manager.info('Testing error handling') assert caplog.record_tuples == [ ('root', 50, 'Error during logging!'), ('root', 40, 'Bailing!'), ]
def test_json_sqlite3_watcher(): test_log_records = [] run_id = str(uuid.uuid4()) with tempfile.NamedTemporaryFile() as sqlite3_db: sqlite3_db_path = sqlite3_db.name init_db(sqlite3_db_path) sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path) sqlite3_logger_def = construct_single_handler_logger( 'sqlite3', 'debug', sqlite3_handler) sqlite3_logger = sqlite3_logger_def.logger_fn( dummy_init_logger_context(sqlite3_logger_def, run_id)) sqlite3_log_manager = DagsterLogManager(run_id, {}, [sqlite3_logger]) for i in range(1000): sqlite3_log_manager.info('Testing ' + str(i)) conn = sqlite3.connect(sqlite3_db_path) cursor = conn.cursor() count = cursor.execute('select count(1) from logs').fetchall() assert count[0][0] == 1000 is_done = threading.Event() is_done.set() test_handler = LogTestHandler(test_log_records) test_logger_def = construct_single_handler_logger( 'test', 'debug', test_handler) test_logger = test_logger_def.logger_fn( dummy_init_logger_context(test_logger_def, run_id)) sqlite3_watcher_log_manager = DagsterLogManager( run_id, {}, [test_logger]) sqlite3_watcher = JsonSqlite3LogWatcher(sqlite3_db_path, sqlite3_watcher_log_manager, is_done) sqlite3_watcher.watch() assert len(test_log_records) == 1000 records = cursor.execute('select * from logs').fetchall() for i, record in enumerate(records): json_record = record[1] assert json_record == seven.json.dumps( test_log_records[i].__dict__)
def execute_step_out_of_process(step_context, step): child_run_config = RunConfig( run_id=step_context.run_config.run_id, tags=step_context.run_config.tags, log_sink=None, event_callback=None, reexecution_config=None, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) with safe_tempfile_path() as log_sink_file: init_db(log_sink_file) # Although the type of is_done is threading._Event in py2, not threading.Event, # it is still constructed using the threading.Event() factory is_done = threading.Event() def log_watcher_thread_target(): log_watcher = JsonSqlite3LogWatcher(log_sink_file, step_context.log, is_done) log_watcher.watch() log_watcher_thread = threading.Thread(target=log_watcher_thread_target) log_watcher_thread.start() command = InProcessExecutorChildProcessCommand( step_context.environment_dict, child_run_config, step_context.executor_config, step.key, log_sink_file, ) try: for step_event in execute_child_process_command(command): if step_context.run_config.event_callback and isinstance( step_event, DagsterEvent): log_step_event(step_context, step_event) yield step_event finally: is_done.set() log_watcher_thread.join()
def test_json_sqlite3_handler(): run_id = str(uuid.uuid4()) with tempfile.NamedTemporaryFile() as sqlite3_db: sqlite3_db_path = sqlite3_db.name init_db(sqlite3_db_path) sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path) sqlite3_logger_def = construct_single_handler_logger( 'sqlite3', 'debug', sqlite3_handler) sqlite3_logger = sqlite3_logger_def.logger_fn( dummy_init_logger_context(sqlite3_logger_def, run_id)) sqlite3_log_manager = DagsterLogManager(run_id, {}, [sqlite3_logger]) for i in range(1000): sqlite3_log_manager.info('Testing ' + str(i)) conn = sqlite3.connect(sqlite3_db_path) cursor = conn.cursor() count = cursor.execute('select count(1) from logs').fetchall() assert count[0][0] == 1000
def test_concurrent_multiprocessing_logging(): test_log_records = [] run_id = str(uuid.uuid4()) with safe_tempfile_path() as sqlite3_db_path: is_done = threading.Event() sqlite3_process = multiprocessing.Process( target=sqlite3_process_target, args=(sqlite3_db_path, run_id) ) test_thread = threading.Thread( target=check_thread_target, args=(sqlite3_db_path, is_done, run_id, test_log_records) ) init_db(sqlite3_db_path) sqlite3_process.start() test_thread.start() try: sqlite3_process.join() finally: is_done.set() assert is_done.is_set() test_thread.join() assert len(test_log_records) == 1000 with sqlite3.connect(sqlite3_db_path) as conn: cursor = conn.cursor() count = cursor.execute('select count(1) from logs').fetchall() assert count[0][0] == 1000 records = cursor.execute('select * from logs').fetchall() for i, record in enumerate(records): json_record = record[1] assert json_record == seven.json.dumps(test_log_records[i].__dict__) conn.close()
def test_concurrent_multiprocessing_logging(): test_log_records = [] run_id = str(uuid.uuid4()) with tempfile.NamedTemporaryFile() as sqlite3_db: sqlite3_db_path = sqlite3_db.name is_done = threading.Event() def sqlite3_process_target(sqlite3_db_path): sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path) sqlite3_logger_def = construct_single_handler_logger( 'sqlite3', 'debug', sqlite3_handler) sqlite3_logger = sqlite3_logger_def.logger_fn( dummy_init_logger_context(sqlite3_logger_def, run_id)) sqlite3_log_manager = DagsterLogManager(run_id, {}, [sqlite3_logger]) for i in range(1000): sqlite3_log_manager.info('Testing ' + str(i)) def test_thread_target(sqlite3_db_path, is_done): test_handler = LogTestHandler(test_log_records) test_logger_def = construct_single_handler_logger( 'test', 'debug', test_handler) test_logger = test_logger_def.logger_fn( dummy_init_logger_context(test_logger_def, run_id)) test_log_manager = DagsterLogManager(run_id, {}, [test_logger]) test_log_watcher = JsonSqlite3LogWatcher(sqlite3_db_path, test_log_manager, is_done) test_log_watcher.watch() sqlite3_process = multiprocessing.Process( target=sqlite3_process_target, args=(sqlite3_db_path, )) test_thread = threading.Thread(target=test_thread_target, args=(sqlite3_db_path, is_done)) init_db(sqlite3_db_path) sqlite3_process.start() test_thread.start() try: sqlite3_process.join() finally: is_done.set() assert is_done.is_set() test_thread.join() assert len(test_log_records) == 1000 conn = sqlite3.connect(sqlite3_db_path) cursor = conn.cursor() count = cursor.execute('select count(1) from logs').fetchall() assert count[0][0] == 1000 records = cursor.execute('select * from logs').fetchall() for i, record in enumerate(records): json_record = record[1] assert json_record == seven.json.dumps( test_log_records[i].__dict__)
def _t_fn(compute_context, inputs): check.inst_param(compute_context, 'compute_context', ComputeExecutionContext) check.param_invariant( isinstance(compute_context.environment_dict, dict), 'context', 'SystemComputeExecutionContext must have valid environment_dict', ) system_compute_context = compute_context.get_system_context() base_dir = '/tmp/dagstermill/{run_id}/'.format(run_id=compute_context.run_id) output_notebook_dir = os.path.join(base_dir, 'output_notebooks/') mkdir_p(output_notebook_dir) temp_path = os.path.join( output_notebook_dir, '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())) ) with safe_tempfile_path() as output_log_path: init_db(output_log_path) # Scaffold the registration here nb = load_notebook_node(notebook_path) nb_no_parameters = replace_parameters( system_compute_context, nb, get_papermill_parameters(system_compute_context, inputs, output_log_path), ) intermediate_path = os.path.join( output_notebook_dir, '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4())) ) write_ipynb(nb_no_parameters, intermediate_path) # Although the type of is_done is threading._Event in py2, not threading.Event, # it is still constructed using the threading.Event() factory is_done = threading.Event() def log_watcher_thread_target(): log_watcher = JsonSqlite3LogWatcher( output_log_path, system_compute_context.log, is_done ) log_watcher.watch() log_watcher_thread = threading.Thread(target=log_watcher_thread_target) log_watcher_thread.start() with user_code_error_boundary( DagstermillExecutionError, lambda: ( 'Error occurred during the execution of Dagstermill solid ' '{solid_name}: {notebook_path}'.format( solid_name=name, notebook_path=notebook_path ) ), ): try: papermill_engines.register('dagstermill', DagstermillNBConvertEngine) papermill.execute_notebook( intermediate_path, temp_path, engine_name='dagstermill', log_output=True ) except Exception as exc: yield Materialization( label='output_notebook', description='Location of output notebook on the filesystem', metadata_entries=[EventMetadataEntry.fspath(temp_path)], ) raise exc finally: is_done.set() log_watcher_thread.join() # deferred import for perf import scrapbook output_nb = scrapbook.read_notebook(temp_path) system_compute_context.log.debug( 'Notebook execution complete for {name}. Data is {data}'.format( name=name, data=output_nb.scraps ) ) yield Materialization( label='output_notebook', description='Location of output notebook on the filesystem', metadata_entries=[EventMetadataEntry.fspath(temp_path)], ) for (output_name, output_def) in system_compute_context.solid_def.output_dict.items(): data_dict = output_nb.scraps.data_dict if output_name in data_dict: value = read_value(output_def.runtime_type, data_dict[output_name]) yield Output(value, output_name) for key, value in output_nb.scraps.items(): if key.startswith('event-'): with open(value.data, 'rb') as fd: yield pickle.loads(fd.read())