def execute_step_out_of_process(step_context, step): if step_context.run_config.loggers: step_context.log.debug( 'Loggers cannot be injected via RunConfig using the multiprocess executor. Define ' 'loggers on the mode instead. Ignoring loggers: [{logger_names}]'. format(logger_names=', '.join([ '\'{name}\''.format(name=logger.name) for logger in step_context.run_config.loggers ]))) run_config = RunConfig( run_id=step_context.run_config.run_id, tags=step_context.run_config.tags, loggers=None, event_callback=None, reexecution_config=None, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) command = InProcessExecutorChildProcessCommand( step_context.environment_dict, run_config, step_context.executor_config, step.key) for step_event in execute_child_process_command(command): if step_context.run_config.event_callback and isinstance( step_event, DagsterEvent): log_step_event(step_context, step_event) yield step_event
def composite_descent(pipeline_def, solids_config, run_config=None): ''' This function is responsible for constructing the dictionary of SolidConfig (indexed by handle) that will be passed into the EnvironmentConfig. Critically this is the codepath that manages config mapping, where the runtime calls into user-defined config mapping functions to produce config for child solids of composites. pipeline_def (PipelineDefintiion): PipelineDefinition solids_config (dict): Configuration for the solids in the pipeline. The "solids" entry of the environment_dict. Assumed to have already been validated. ''' check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.dict_param(solids_config, 'solids_config') run_config = (RunConfig() if run_config is None else check.inst_param( run_config, 'run_config', IRunConfig)) return { handle.to_string(): solid_config for handle, solid_config in _composite_descent( parent_stack=DescentStack(pipeline_def, None), solids_config_dict=solids_config, ) }
def test_papertrail_logger(): with mock.patch('logging.handlers.SysLogHandler.emit') as emit: execute_pipeline( hello_pipeline, { 'loggers': { 'console': { 'config': { 'log_level': 'INFO' } }, 'papertrail': { 'config': { 'log_level': 'INFO', 'name': 'hello_pipeline', 'papertrail_address': '127.0.0.1', 'papertrail_port': 12345, } }, } }, run_config=RunConfig(run_id='123'), ) log_record = emit.call_args_list[0][0][0] assert isinstance(log_record, logging.LogRecord) assert log_record.name == 'hello_pipeline' assert log_record.levelname == 'INFO' assert (log_record.msg == '''system - 123 - Hello, world! solid = "hello_logs" solid_definition = "hello_logs" step_key = "hello_logs.compute"''')
def test_single_step_resource_event_logs(): # Test to attribute logs for single-step plans which are often the representation of # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239 USER_SOLID_MESSAGE = 'I AM A SOLID' USER_RESOURCE_MESSAGE = 'I AM A RESOURCE' events = [] def event_callback(record): assert isinstance(record, EventRecord) events.append(record) @solid(required_resource_keys={'a'}) def resource_solid(context): context.log.info(USER_SOLID_MESSAGE) @resource def resource_a(context): context.log.info(USER_RESOURCE_MESSAGE) return 'A' pipeline = PipelineDefinition( name='resource_logging_pipeline', solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={'a': resource_a}, logger_defs={ 'callback': construct_event_logger(event_callback) }, ) ], ) result = execute_pipeline( pipeline, environment_dict={ 'loggers': { 'callback': {} }, }, instance=DagsterInstance.local_temp(), run_config=RunConfig(step_keys_to_execute=['resource_solid.compute']), ) assert result.success log_messages = [ event for event in events if isinstance(event, LogMessageRecord) ] assert len(log_messages) == 2 resource_log_message = next( iter([ message for message in log_messages if message.user_message == USER_RESOURCE_MESSAGE ])) assert resource_log_message.step_key == 'resource_solid.compute'
def test_execute_on_dask(): '''This test is flaky on py27, I believe because of https://github.com/dask/distributed/issues/2446. For now, we just retry a couple times... ''' result = execute_on_dask( ExecutionTargetHandle.for_pipeline_fn(define_dask_test_pipeline), env_config={'storage': { 'filesystem': {} }}, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM), dask_config=DaskConfig(timeout=30), ) assert result.result_for_solid('simple').transformed_value() == 1
def execute_step_out_of_process(step_context, step): child_run_id = step_context.run_config.run_id child_run_config = RunConfig( run_id=child_run_id, tags=step_context.run_config.tags, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) command = InProcessExecutorChildProcessCommand( step_context.environment_dict, child_run_config, step_context.executor_config, step.key, step_context.instance.get_ref(), ) for event_or_none in execute_child_process_command(command): yield event_or_none
def evaluate_config(config_type, config_value, pipeline=None, run_config=None, seen_handles=None): return _evaluate_config( TraversalContext( config_type=check.inst_param(config_type, 'config_type', ConfigType), config_value=config_value, stack=EvaluationStack(config_type=config_type, entries=[]), pipeline=check.opt_inst_param(pipeline, 'pipeline', PipelineDefinition), run_config=check.opt_inst_param(run_config, 'run_config', IRunConfig, default=RunConfig()), seen_handles=check.opt_list_param(seen_handles, 'seen_handles'), ))
def execute_step_out_of_process(step_context, step): child_run_config = RunConfig( run_id=step_context.run_config.run_id, tags=step_context.run_config.tags, log_sink=None, event_callback=None, reexecution_config=None, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) with safe_tempfile_path() as log_sink_file: init_db(log_sink_file) # Although the type of is_done is threading._Event in py2, not threading.Event, # it is still constructed using the threading.Event() factory is_done = threading.Event() def log_watcher_thread_target(): log_watcher = JsonSqlite3LogWatcher(log_sink_file, step_context.log, is_done) log_watcher.watch() log_watcher_thread = threading.Thread(target=log_watcher_thread_target) log_watcher_thread.start() command = InProcessExecutorChildProcessCommand( step_context.environment_dict, child_run_config, step_context.executor_config, step.key, log_sink_file, ) try: for step_event in execute_child_process_command(command): if step_context.run_config.event_callback and isinstance( step_event, DagsterEvent): log_step_event(step_context, step_event) yield step_event finally: is_done.set() log_watcher_thread.join()
def build(pipeline, environment_dict=None, run_config=None): check.inst_param(pipeline, 'pipeline', PipelineDefinition) check.opt_dict_param(environment_dict, 'environment') run_config = check.opt_inst_param(run_config, 'run_config', IRunConfig, default=RunConfig()) mode = run_config.mode or pipeline.get_default_mode_name() environment_type = create_environment_type(pipeline, mode) result = evaluate_config(environment_type, environment_dict, pipeline, run_config) if not result.success: raise DagsterInvalidConfigError(pipeline, result.errors, environment_dict) return EnvironmentConfig.from_config_value(result.value, environment_dict)
def execute_step_out_of_process(step_context, step): with safe_tempfile_path() as sqlite_file: event_sink = SqliteEventSink(sqlite_file, raise_on_error=True) child_run_config = RunConfig( run_id=step_context.run_config.run_id, tags=step_context.run_config.tags, event_sink=event_sink, step_keys_to_execute=step_context.run_config.step_keys_to_execute, mode=step_context.run_config.mode, ) command = InProcessExecutorChildProcessCommand( step_context.environment_dict, child_run_config, step_context.executor_config, step.key) with event_sink.log_forwarding(step_context.log): for event_or_none in execute_child_process_command(command): yield event_or_none
def get_preset(self, name): check.str_param(name, 'name') if name not in self._preset_dict: raise DagsterInvariantViolationError( ('Could not find preset for "{name}". Available presets ' 'for pipeline "{pipeline_name}" are {preset_names}.').format( name=name, preset_names=list(self._preset_dict.keys()), pipeline_name=self._name)) preset = self._preset_dict[name] pipeline = self if preset.solid_subset is not None: pipeline = pipeline.build_sub_pipeline(preset.solid_subset) return { 'pipeline': pipeline, 'environment_dict': preset.get_environment_dict(self._name), 'run_config': RunConfig(mode=preset.mode), }
def build(pipeline, environment_dict=None, run_config=None): from dagster.config.validate import process_config from .composite_descent import composite_descent check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') run_config = check.opt_inst_param(run_config, 'run_config', IRunConfig, default=RunConfig()) mode = run_config.mode or pipeline.get_default_mode_name() environment_type = create_environment_type(pipeline, mode) config_evr = process_config(environment_type, environment_dict) if not config_evr.success: raise DagsterInvalidConfigError( 'Error in config for pipeline {}'.format(pipeline.name), config_evr.errors, environment_dict, ) config_value = config_evr.value solid_config_dict = composite_descent(pipeline, config_value.get('solids', {}), run_config) return EnvironmentConfig( solids=solid_config_dict, execution=ExecutionConfig.from_dict(config_value.get('execution')), storage=StorageConfig.from_dict(config_value.get('storage')), loggers=config_value.get('loggers'), original_config_dict=environment_dict, resources=config_value.get('resources'), )