예제 #1
0
def test_error_during_logging(caplog):
    run_id = str(uuid.uuid4())
    with tempfile.NamedTemporaryFile() as sqlite3_db:
        sqlite3_db_path = sqlite3_db.name
        init_db(sqlite3_db_path)

        sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path)

        def err_conn(*args, **kwargs):
            raise Exception('Bailing!')

        sqlite3_handler.connect = err_conn

        sqlite3_logger_def = construct_single_handler_logger(
            'sqlite3', 'debug', sqlite3_handler)
        sqlite3_logger = sqlite3_logger_def.logger_fn(
            dummy_init_logger_context(sqlite3_logger_def, run_id))
        sqlite3_log_manager = DagsterLogManager(run_id, {}, [sqlite3_logger])

        sqlite3_log_manager.info('Testing error handling')

        assert caplog.record_tuples == [
            ('root', 50, 'Error during logging!'),
            ('root', 40, 'Bailing!'),
        ]
예제 #2
0
def test_json_sqlite3_watcher():
    test_log_records = []
    run_id = str(uuid.uuid4())
    with tempfile.NamedTemporaryFile() as sqlite3_db:
        sqlite3_db_path = sqlite3_db.name
        init_db(sqlite3_db_path)

        sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path)
        sqlite3_logger_def = construct_single_handler_logger(
            'sqlite3', 'debug', sqlite3_handler)
        sqlite3_logger = sqlite3_logger_def.logger_fn(
            dummy_init_logger_context(sqlite3_logger_def, run_id))
        sqlite3_log_manager = DagsterLogManager(run_id, {}, [sqlite3_logger])

        for i in range(1000):
            sqlite3_log_manager.info('Testing ' + str(i))

        conn = sqlite3.connect(sqlite3_db_path)
        cursor = conn.cursor()
        count = cursor.execute('select count(1) from logs').fetchall()
        assert count[0][0] == 1000

        is_done = threading.Event()
        is_done.set()

        test_handler = LogTestHandler(test_log_records)
        test_logger_def = construct_single_handler_logger(
            'test', 'debug', test_handler)
        test_logger = test_logger_def.logger_fn(
            dummy_init_logger_context(test_logger_def, run_id))
        sqlite3_watcher_log_manager = DagsterLogManager(
            run_id, {}, [test_logger])
        sqlite3_watcher = JsonSqlite3LogWatcher(sqlite3_db_path,
                                                sqlite3_watcher_log_manager,
                                                is_done)

        sqlite3_watcher.watch()

        assert len(test_log_records) == 1000

        records = cursor.execute('select * from logs').fetchall()
        for i, record in enumerate(records):
            json_record = record[1]
            assert json_record == seven.json.dumps(
                test_log_records[i].__dict__)
예제 #3
0
def execute_step_out_of_process(step_context, step):
    child_run_config = RunConfig(
        run_id=step_context.run_config.run_id,
        tags=step_context.run_config.tags,
        log_sink=None,
        event_callback=None,
        reexecution_config=None,
        step_keys_to_execute=step_context.run_config.step_keys_to_execute,
        mode=step_context.run_config.mode,
    )

    with safe_tempfile_path() as log_sink_file:
        init_db(log_sink_file)
        # Although the type of is_done is threading._Event in py2, not threading.Event,
        # it is still constructed using the threading.Event() factory
        is_done = threading.Event()

        def log_watcher_thread_target():
            log_watcher = JsonSqlite3LogWatcher(log_sink_file,
                                                step_context.log, is_done)
            log_watcher.watch()

        log_watcher_thread = threading.Thread(target=log_watcher_thread_target)

        log_watcher_thread.start()

        command = InProcessExecutorChildProcessCommand(
            step_context.environment_dict,
            child_run_config,
            step_context.executor_config,
            step.key,
            log_sink_file,
        )
        try:
            for step_event in execute_child_process_command(command):
                if step_context.run_config.event_callback and isinstance(
                        step_event, DagsterEvent):
                    log_step_event(step_context, step_event)
                yield step_event

        finally:
            is_done.set()
            log_watcher_thread.join()
예제 #4
0
def test_json_sqlite3_handler():
    run_id = str(uuid.uuid4())
    with tempfile.NamedTemporaryFile() as sqlite3_db:
        sqlite3_db_path = sqlite3_db.name
        init_db(sqlite3_db_path)

        sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path)
        sqlite3_logger_def = construct_single_handler_logger(
            'sqlite3', 'debug', sqlite3_handler)
        sqlite3_logger = sqlite3_logger_def.logger_fn(
            dummy_init_logger_context(sqlite3_logger_def, run_id))
        sqlite3_log_manager = DagsterLogManager(run_id, {}, [sqlite3_logger])

        for i in range(1000):
            sqlite3_log_manager.info('Testing ' + str(i))

        conn = sqlite3.connect(sqlite3_db_path)
        cursor = conn.cursor()
        count = cursor.execute('select count(1) from logs').fetchall()
        assert count[0][0] == 1000
예제 #5
0
def test_concurrent_multiprocessing_logging():
    test_log_records = []
    run_id = str(uuid.uuid4())

    with safe_tempfile_path() as sqlite3_db_path:
        is_done = threading.Event()

        sqlite3_process = multiprocessing.Process(
            target=sqlite3_process_target, args=(sqlite3_db_path, run_id)
        )

        test_thread = threading.Thread(
            target=check_thread_target, args=(sqlite3_db_path, is_done, run_id, test_log_records)
        )

        init_db(sqlite3_db_path)

        sqlite3_process.start()
        test_thread.start()

        try:
            sqlite3_process.join()
        finally:
            is_done.set()

        assert is_done.is_set()
        test_thread.join()
        assert len(test_log_records) == 1000

        with sqlite3.connect(sqlite3_db_path) as conn:
            cursor = conn.cursor()
            count = cursor.execute('select count(1) from logs').fetchall()
            assert count[0][0] == 1000

            records = cursor.execute('select * from logs').fetchall()
            for i, record in enumerate(records):
                json_record = record[1]
                assert json_record == seven.json.dumps(test_log_records[i].__dict__)
        conn.close()
예제 #6
0
def test_concurrent_multiprocessing_logging():
    test_log_records = []
    run_id = str(uuid.uuid4())

    with tempfile.NamedTemporaryFile() as sqlite3_db:

        sqlite3_db_path = sqlite3_db.name
        is_done = threading.Event()

        def sqlite3_process_target(sqlite3_db_path):
            sqlite3_handler = JsonSqlite3Handler(sqlite3_db_path)
            sqlite3_logger_def = construct_single_handler_logger(
                'sqlite3', 'debug', sqlite3_handler)
            sqlite3_logger = sqlite3_logger_def.logger_fn(
                dummy_init_logger_context(sqlite3_logger_def, run_id))
            sqlite3_log_manager = DagsterLogManager(run_id, {},
                                                    [sqlite3_logger])

            for i in range(1000):
                sqlite3_log_manager.info('Testing ' + str(i))

        def test_thread_target(sqlite3_db_path, is_done):
            test_handler = LogTestHandler(test_log_records)
            test_logger_def = construct_single_handler_logger(
                'test', 'debug', test_handler)
            test_logger = test_logger_def.logger_fn(
                dummy_init_logger_context(test_logger_def, run_id))
            test_log_manager = DagsterLogManager(run_id, {}, [test_logger])
            test_log_watcher = JsonSqlite3LogWatcher(sqlite3_db_path,
                                                     test_log_manager, is_done)
            test_log_watcher.watch()

        sqlite3_process = multiprocessing.Process(
            target=sqlite3_process_target, args=(sqlite3_db_path, ))

        test_thread = threading.Thread(target=test_thread_target,
                                       args=(sqlite3_db_path, is_done))

        init_db(sqlite3_db_path)

        sqlite3_process.start()
        test_thread.start()

        try:
            sqlite3_process.join()
        finally:
            is_done.set()

        assert is_done.is_set()
        test_thread.join()
        assert len(test_log_records) == 1000

        conn = sqlite3.connect(sqlite3_db_path)
        cursor = conn.cursor()
        count = cursor.execute('select count(1) from logs').fetchall()
        assert count[0][0] == 1000

        records = cursor.execute('select * from logs').fetchall()
        for i, record in enumerate(records):
            json_record = record[1]
            assert json_record == seven.json.dumps(
                test_log_records[i].__dict__)
예제 #7
0
파일: solids.py 프로젝트: estatic/dagster
    def _t_fn(compute_context, inputs):
        check.inst_param(compute_context, 'compute_context', ComputeExecutionContext)
        check.param_invariant(
            isinstance(compute_context.environment_dict, dict),
            'context',
            'SystemComputeExecutionContext must have valid environment_dict',
        )

        system_compute_context = compute_context.get_system_context()

        base_dir = '/tmp/dagstermill/{run_id}/'.format(run_id=compute_context.run_id)
        output_notebook_dir = os.path.join(base_dir, 'output_notebooks/')
        mkdir_p(output_notebook_dir)

        temp_path = os.path.join(
            output_notebook_dir, '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4()))
        )

        with safe_tempfile_path() as output_log_path:
            init_db(output_log_path)

            # Scaffold the registration here
            nb = load_notebook_node(notebook_path)
            nb_no_parameters = replace_parameters(
                system_compute_context,
                nb,
                get_papermill_parameters(system_compute_context, inputs, output_log_path),
            )
            intermediate_path = os.path.join(
                output_notebook_dir, '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4()))
            )
            write_ipynb(nb_no_parameters, intermediate_path)

            # Although the type of is_done is threading._Event in py2, not threading.Event,
            # it is still constructed using the threading.Event() factory
            is_done = threading.Event()

            def log_watcher_thread_target():
                log_watcher = JsonSqlite3LogWatcher(
                    output_log_path, system_compute_context.log, is_done
                )
                log_watcher.watch()

            log_watcher_thread = threading.Thread(target=log_watcher_thread_target)

            log_watcher_thread.start()

            with user_code_error_boundary(
                DagstermillExecutionError,
                lambda: (
                    'Error occurred during the execution of Dagstermill solid '
                    '{solid_name}: {notebook_path}'.format(
                        solid_name=name, notebook_path=notebook_path
                    )
                ),
            ):
                try:
                    papermill_engines.register('dagstermill', DagstermillNBConvertEngine)
                    papermill.execute_notebook(
                        intermediate_path, temp_path, engine_name='dagstermill', log_output=True
                    )
                except Exception as exc:
                    yield Materialization(
                        label='output_notebook',
                        description='Location of output notebook on the filesystem',
                        metadata_entries=[EventMetadataEntry.fspath(temp_path)],
                    )
                    raise exc
                finally:
                    is_done.set()
                    log_watcher_thread.join()

            # deferred import for perf
            import scrapbook

            output_nb = scrapbook.read_notebook(temp_path)

            system_compute_context.log.debug(
                'Notebook execution complete for {name}. Data is {data}'.format(
                    name=name, data=output_nb.scraps
                )
            )

            yield Materialization(
                label='output_notebook',
                description='Location of output notebook on the filesystem',
                metadata_entries=[EventMetadataEntry.fspath(temp_path)],
            )

            for (output_name, output_def) in system_compute_context.solid_def.output_dict.items():
                data_dict = output_nb.scraps.data_dict
                if output_name in data_dict:
                    value = read_value(output_def.runtime_type, data_dict[output_name])

                    yield Output(value, output_name)

            for key, value in output_nb.scraps.items():
                if key.startswith('event-'):
                    with open(value.data, 'rb') as fd:
                        yield pickle.loads(fd.read())