def report_engine_event( self, message, pipeline_run, engine_event_data=None, cls=None, step_key=None, ): """ Report a EngineEvent that occurred outside of a pipeline execution context. """ from dagster.core.events import EngineEventData, DagsterEvent, DagsterEventType from dagster.core.events.log import DagsterEventRecord check.class_param(cls, "cls") check.str_param(message, "message") check.inst_param(pipeline_run, "pipeline_run", PipelineRun) engine_event_data = check.opt_inst_param( engine_event_data, "engine_event_data", EngineEventData, EngineEventData([]), ) if cls: message = "[{}] {}".format(cls.__name__, message) log_level = logging.INFO if engine_event_data and engine_event_data.error: log_level = logging.ERROR dagster_event = DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=pipeline_run.pipeline_name, message=message, event_specific_data=engine_event_data, ) event_record = DagsterEventRecord( message=message, user_message=message, level=log_level, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, timestamp=time.time(), step_key=step_key, dagster_event=dagster_event, ) self.handle_new_event(event_record) return dagster_event
def terminate(self, run_id): check.str_param(run_id, "run_id") run = self._instance.get_run_by_id(run_id) if not run: return False can_terminate = self.can_terminate(run_id) if not can_terminate: self._instance.report_engine_event( message= "Unable to terminate pipeline: can_terminate returned {}.". format(can_terminate), pipeline_run=run, cls=self.__class__, ) return False job_name = get_job_name_from_run_id(run_id) job_namespace = self.get_namespace_from_run_config(run_id) self._instance.report_run_canceling(run) try: termination_result = delete_job(job_name=job_name, namespace=job_namespace) if termination_result: self._instance.report_engine_event( message="Pipeline was terminated successfully.", pipeline_run=run, cls=self.__class__, ) else: self._instance.report_engine_event( message= "Pipeline was not terminated successfully; delete_job returned {}" .format(termination_result), pipeline_run=run, cls=self.__class__, ) return termination_result except Exception: # pylint: disable=broad-except self._instance.report_engine_event( message= "Pipeline was not terminated successfully; encountered error in delete_job", pipeline_run=run, engine_event_data=EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls=self.__class__, )
def core_execute_run(recon_pipeline, pipeline_run, instance): check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) # try to load the pipeline definition early try: recon_pipeline.get_definition() except Exception: # pylint: disable=broad-except yield instance.report_engine_event( "Could not load pipeline definition.", pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), ) yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id) return try: yield from execute_run_iterator(recon_pipeline, pipeline_run, instance) except (KeyboardInterrupt, DagsterExecutionInterruptedError): yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id) yield instance.report_engine_event( message="Pipeline execution terminated by interrupt", pipeline_run=pipeline_run, ) except Exception: # pylint: disable=broad-except yield instance.report_engine_event( "An exception was thrown during execution that is likely a framework error, " "rather than an error in user code.", pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), ) yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id)
def launch_step( self, step_handler_context: StepHandlerContext) -> List[DagsterEvent]: container_context = self._get_docker_container_context( step_handler_context) client = self._get_client(container_context) step_image = self._get_image(step_handler_context) validate_docker_image(step_image) try: step_container = self._create_step_container( client, container_context, step_image, step_handler_context.execute_step_args) except docker.errors.ImageNotFound: client.images.pull(step_image) step_container = self._create_step_container( client, container_context, step_image, step_handler_context.execute_step_args) if len(container_context.networks) > 1: for network_name in container_context.networks[1:]: network = client.networks.get(network_name) network.connect(step_container) assert (len( step_handler_context.execute_step_args.step_keys_to_execute) == 1 ), "Launching multiple steps is not currently supported" step_key = step_handler_context.execute_step_args.step_keys_to_execute[ 0] events = [ DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=step_handler_context.execute_step_args. pipeline_origin.pipeline_name, step_key=step_key, message="Launching step in Docker container", event_specific_data=EngineEventData([ MetadataEntry("Step key", value=step_key), MetadataEntry("Docker container id", value=step_container.id), ], ), ) ] step_container.start() return events
def _launch_k8s_job_with_args(self, job_name, args, run): container_context = self.get_container_context_for_run(run) pod_name = job_name pipeline_origin = run.pipeline_code_origin user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) repository_origin = pipeline_origin.repository_origin job_config = container_context.get_k8s_job_config( job_image=repository_origin.container_image, run_launcher=self) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) job = construct_dagster_k8s_job( job_config=job_config, args=args, job_name=job_name, pod_name=pod_name, component="run_worker", user_defined_k8s_config=user_defined_k8s_config, labels={ "dagster/job": pipeline_origin.pipeline_name, "dagster/run-id": run.run_id, }, ) self._instance.report_engine_event( "Creating Kubernetes run worker job", run, EngineEventData([ MetadataEntry("Kubernetes Job name", value=job_name), MetadataEntry("Kubernetes Namespace", value=container_context.namespace), MetadataEntry("Run ID", value=run.run_id), ]), cls=self.__class__, ) self._batch_api.create_namespaced_job( body=job, namespace=container_context.namespace) self._instance.report_engine_event( "Kubernetes run worker job created", run, cls=self.__class__, )
def evt(name): return EventRecord( None, name, "debug", "", "foo", time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), )
def create_test_event_log_record(message: str, run_id: str = DEFAULT_RUN_ID): return EventRecord( None, message, "debug", "", run_id, time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), )
def _execute_plan(_self, instance_ref_dict, handle_dict, run_id, step_keys, retries_dict): check.dict_param(instance_ref_dict, 'instance_ref_dict') check.dict_param(handle_dict, 'handle_dict') check.str_param(run_id, 'run_id') check.list_param(step_keys, 'step_keys', of_type=str) check.dict_param(retries_dict, 'retries_dict') instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) handle = ExecutionTargetHandle.from_dict(handle_dict) retries = Retries.from_config(retries_dict) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) pipeline_def = handle.build_pipeline_definition().build_sub_pipeline( pipeline_run.selector.solid_subset ) step_keys_str = ", ".join(step_keys) execution_plan = create_execution_plan( pipeline_def, pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ).build_subset_plan(step_keys) engine_event = instance.report_engine_event( 'Executing steps {} in celery worker'.format(step_keys_str), pipeline_run, EngineEventData( [EventMetadataEntry.text(step_keys_str, 'step_keys'),], marker_end=DELEGATE_MARKER, ), CeleryEngine, step_key=execution_plan.step_key_for_single_step_plans(), ) events = [engine_event] for step_event in execute_plan_iterator( execution_plan, pipeline_run=pipeline_run, environment_dict=pipeline_run.environment_dict, instance=instance, retries=retries, ): events.append(step_event) serialized_events = [serialize_dagster_namedtuple(event) for event in events] return serialized_events
def create_event(count: int, run_id: str = RUN_ID): return EventLogEntry( None, str(count), "debug", "", run_id, time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), )
def evt(name): return DagsterEventRecord( None, name, 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )
def execute(pipeline_context, execution_plan): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) step_keys_to_execute = execution_plan.step_keys_to_execute yield DagsterEvent.engine_event( pipeline_context, 'Executing steps in process (pid: {pid})'.format(pid=os.getpid()), event_specific_data=EngineEventData.in_process( os.getpid(), step_keys_to_execute, ), ) with time_execution_scope() as timer_result: check.param_invariant( isinstance(pipeline_context.executor_config, ExecutorConfig), 'pipeline_context', 'Expected executor_config to be ExecutorConfig got {}'.format( pipeline_context.executor_config), ) for event in inner_plan_execution_iterator( pipeline_context, execution_plan, pipeline_context.executor_config.retries): yield event yield DagsterEvent.engine_event( pipeline_context, 'Finished steps in process (pid: {pid}) in {duration_ms}'.format( pid=os.getpid(), duration_ms=format_duration(timer_result.millis)), event_specific_data=EngineEventData.in_process( os.getpid(), step_keys_to_execute), )
def execute_step_out_of_process(self, step_context, step, errors, term_events): command = InProcessExecutorChildProcessCommand( run_config=step_context.run_config, pipeline_run=step_context.pipeline_run, step_key=step.key, instance_ref=step_context.instance.get_ref(), term_event=term_events[step.key], recon_pipeline=self.pipeline, retries=self.retries, ) yield DagsterEvent.engine_event( step_context, "Launching subprocess for {}".format(step.key), EngineEventData(marker_start=DELEGATE_MARKER), step_key=step.key, ) for ret in execute_child_process_command(command): if ret is None or isinstance(ret, DagsterEvent): yield ret elif isinstance(ret, ChildProcessEvent): if isinstance(ret, ChildProcessSystemErrorEvent): errors[ret.pid] = ret.error_info elif isinstance(ret, KeyboardInterrupt): yield DagsterEvent.engine_event( step_context, "Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes", EngineEventData.interrupted(list(term_events.keys())), ) for term_event in term_events.values(): term_event.set() else: check.failed( "Unexpected return value from child process {}".format( type(ret)))
def execute(self, plan_context, execution_plan): check.inst_param(plan_context, "plan_context", PlanOrchestrationContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) step_keys_to_execute = execution_plan.step_keys_to_execute yield DagsterEvent.engine_event( plan_context, "Executing steps in process (pid: {pid})".format(pid=os.getpid()), event_specific_data=EngineEventData.in_process(os.getpid(), step_keys_to_execute), ) with time_execution_scope() as timer_result: yield from iter( ExecuteRunWithPlanIterable( execution_plan=plan_context.execution_plan, iterator=inner_plan_execution_iterator, execution_context_manager=PlanExecutionContextManager( pipeline=plan_context.pipeline, retry_mode=plan_context.retry_mode, execution_plan=plan_context.execution_plan, run_config=plan_context.run_config, pipeline_run=plan_context.pipeline_run, instance=plan_context.instance, raise_on_error=plan_context.raise_on_error, output_capture=plan_context.output_capture, ), ) ) yield DagsterEvent.engine_event( plan_context, "Finished steps in process (pid: {pid}) in {duration_ms}".format( pid=os.getpid(), duration_ms=format_duration(timer_result.millis) ), event_specific_data=EngineEventData.in_process(os.getpid(), step_keys_to_execute), )
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) job_name = 'dagster-run-{}'.format(run.run_id) pod_name = job_name resources = get_k8s_resource_requirements( frozentags(external_pipeline.tags)) job = construct_dagster_k8s_job( job_config=self.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({ 'runId': run.run_id, 'repositoryName': external_pipeline.handle.repository_name, 'repositoryLocationName': external_pipeline.handle.location_name, }), '--remap-sigterm', ], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData([ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ]), cls=K8sRunLauncher, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = "dagster-run-{}".format(run.run_id) pod_name = job_name user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) pipeline_origin = external_pipeline.get_python_origin() repository_origin = pipeline_origin.repository_origin job_config = (self._get_grpc_job_config( repository_origin.container_image) if repository_origin.container_image else self.get_static_job_config()) input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config=job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( "Kubernetes run worker job launched", run, EngineEventData([ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, ) return run
def launch_run(self, instance, run, external_pipeline): if isinstance( external_pipeline.get_external_origin(). external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}". format(type(repository_location_handle))) repository_name = external_pipeline.repository_handle.repository_name location_name = external_pipeline.repository_handle.repository_location_handle.location_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle. get_repository_python_origin(repository_name), ) else: location_name = 'local' pipeline_origin = external_pipeline.get_python_origin() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) app = self._get_app(location_name) sig = app.signature('launch_run', args=(input_json, ), queue=f"{location_name}-pipelines") result = sig.delay() instance.report_engine_event( "Started Celery task for pipeline (task id: {result.id}).".format( result=result), run, EngineEventData(metadata_entries=[ EventMetadataEntry.text(result.id, "task_id"), ]), ) return run
def _execute_run_command_body(recon_pipeline, pipeline_run_id, instance, write_stream_fn, set_exit_code_on_failure): if instance.should_start_background_run_thread: cancellation_thread, cancellation_thread_shutdown_event = start_run_cancellation_thread( instance, pipeline_run_id) pipeline_run = instance.get_run_by_id(pipeline_run_id) pid = os.getpid() instance.report_engine_event( "Started process for run (pid: {pid}).".format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end="cli_api_subprocess_init"), ) run_worker_failed = 0 try: for event in core_execute_run( recon_pipeline, pipeline_run, instance, ): write_stream_fn(event) if event.event_type == DagsterEventType.PIPELINE_FAILURE: run_worker_failed = True except: # relies on core_execute_run writing failures to the event log before raising run_worker_failed = True finally: if instance.should_start_background_run_thread: cancellation_thread_shutdown_event.set() if cancellation_thread.is_alive(): cancellation_thread.join(timeout=15) if cancellation_thread.is_alive(): instance.report_engine_event( "Cancellation thread did not shutdown gracefully", pipeline_run, ) instance.report_engine_event( "Process for run exited (pid: {pid}).".format(pid=pid), pipeline_run, ) return 1 if (run_worker_failed and set_exit_code_on_failure) else 0
def _execute_schedule(graphene_info, external_pipeline, execution_params, errors): check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) instance = graphene_info.context.instance mode, environment_dict = execution_params.mode, execution_params.environment_dict validation_result = validate_config_from_snap( external_pipeline.config_schema_snapshot, external_pipeline.root_config_key_for_mode(mode), environment_dict, ) if validation_result.success: external_execution_plan = graphene_info.context.get_external_execution_plan( external_pipeline, environment_dict, mode, execution_params.step_keys) pipeline_run = instance.create_run( pipeline_name=external_pipeline.name, environment_dict=environment_dict, mode=mode, solid_subset=(execution_params.selector.solid_subset if execution_params.selector is not None else None), tags=execution_params.execution_metadata.tags, pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=external_execution_plan. execution_plan_snapshot, ) # Inject errors into event log at this point if len(errors) > 0: for error in errors: instance.report_engine_event(error.message, pipeline_run, EngineEventData.engine_error(error)) # Launch run if run launcher is defined run_launcher = graphene_info.context.instance.run_launcher if run_launcher: result = _launch_pipeline_execution_for_created_run( graphene_info, pipeline_run.run_id) else: result = _start_pipeline_execution_for_created_run( graphene_info, pipeline_run.run_id) return pipeline_run, result
def cli_api_execute_run(output_file, instance, repository_handle, pipeline_run): check.str_param(output_file, 'output_file') check.inst_param(instance, 'instance', DagsterInstance) check.inst_param(repository_handle, 'repository_handle', RepositoryHandle) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) pointer = repository_handle.get_pointer() location_handle = repository_handle.repository_location_handle if isinstance(location_handle, PythonEnvRepositoryLocationHandle): executable_path = location_handle.executable_path elif isinstance(location_handle, InProcessRepositoryLocationHandle): # [legacy] default to using sys.executable for the in process # location handle executable_path = sys.executable else: raise DagsterInvariantViolationError( "Unable to resolve executable_path for repository location handle of type {}" .format(location_handle.__class__)) executable_path = (location_handle.executable_path if isinstance( location_handle, PythonEnvRepositoryLocationHandle) else sys.executable) parts = ( [executable_path, '-m', 'dagster', 'api', 'execute_run', output_file] + xplat_shlex_split(pointer.get_cli_args()) + [ '--instance-ref', '{instance_ref}'.format( instance_ref=serialize_dagster_namedtuple(instance.get_ref())), '--pipeline-run-id', '{pipeline_run_id}'.format(pipeline_run_id=pipeline_run.run_id), ]) instance.report_engine_event( 'About to start process for pipeline "{pipeline_name}" (run_id: {run_id}).' .format(pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id), pipeline_run, engine_event_data=EngineEventData( marker_start='cli_api_subprocess_init'), ) return open_ipc_subprocess(parts)
def cli_api_execute_run(output_file, instance, pipeline_origin, pipeline_run): check.str_param(output_file, 'output_file') check.inst_param(instance, 'instance', DagsterInstance) check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) from dagster.cli.api import ExecuteRunArgsLoadComplete with safe_tempfile_path() as input_file: write_unary_input( input_file, ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ), ) parts = [ pipeline_origin.executable_path, '-m', 'dagster', 'api', 'execute_run', input_file, output_file, ] instance.report_engine_event( 'About to start process for pipeline "{pipeline_name}" (run_id: {run_id}).' .format(pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id), pipeline_run, engine_event_data=EngineEventData( marker_start='cli_api_subprocess_init'), ) process = open_ipc_subprocess(parts) # we need to process this event in order to ensure that the called process loads the input event = next(ipc_read_event_stream(output_file)) check.inst(event, ExecuteRunArgsLoadComplete) return process
def _execute_schedule(graphene_info, pipeline_def, execution_params, errors): instance = graphene_info.context.instance execution_plan = None if is_config_valid(pipeline_def, execution_params.environment_dict, execution_params.mode): execution_plan = create_execution_plan( pipeline_def, execution_params.environment_dict, mode=execution_params.mode, ) execution_plan_snapshot = None if execution_plan: execution_plan_snapshot = snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id() ) pipeline_run = instance.create_run( pipeline_name=pipeline_def.name, environment_dict=execution_params.environment_dict, mode=execution_params.mode, solid_subset=( execution_params.selector.solid_subset if execution_params.selector is not None else None ), tags=execution_params.execution_metadata.tags, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=execution_plan_snapshot, ) # Inject errors into event log at this point if len(errors) > 0: for error in errors: instance.report_engine_event( error.message, pipeline_run, EngineEventData.engine_error(error) ) # Launch run if run launcher is defined run_launcher = graphene_info.context.instance.run_launcher if run_launcher: result = _launch_pipeline_execution_for_created_run(graphene_info, pipeline_run.run_id) else: result = _start_pipeline_execution_for_created_run(graphene_info, pipeline_run.run_id) return pipeline_run, result
def launch_run(self, instance, run, external_pipeline): check.inst_param(instance, 'instance', DagsterInstance) check.inst_param(run, 'run', PipelineRun) check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) job_name = 'dagster-run-{}'.format(run.run_id) pod_name = job_name job = construct_dagster_graphql_k8s_job( self.job_config, args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps( { 'runId': run.run_id, 'repositoryName': external_pipeline.handle.repository_name, 'repositoryLocationName': external_pipeline.handle.location_name, } ), ], job_name=job_name, pod_name=pod_name, component='runmaster', ) api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=self.job_namespace) instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData( [ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ] ), cls=K8sRunLauncher, ) return run
def report_engine_event(self, cls, message, pipeline_run, engine_event_data=None): ''' Report a EngineEvent that occurred outside of a pipeline execution context. ''' from dagster.core.events import EngineEventData, DagsterEvent, DagsterEventType from dagster.core.events.log import DagsterEventRecord check.class_param(cls, 'cls') check.str_param(message, 'message') check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) engine_event_data = check.opt_inst_param( engine_event_data, 'engine_event_data', EngineEventData, EngineEventData([]), ) message = "[{}] {}".format(cls.__name__, message) log_level = logging.INFO if engine_event_data and engine_event_data.error: log_level = logging.ERROR event_record = DagsterEventRecord( message=message, user_message=message, level=log_level, pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id, error_info=None, timestamp=time.time(), dagster_event=DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=pipeline_run.pipeline_name, message=message, event_specific_data=engine_event_data, ), ) self.handle_new_event(event_record)
def _execute_run(request): try: execute_run_args = deserialize_json_to_dagster_namedtuple( request.serialized_execute_run_args) check.inst_param(execute_run_args, 'execute_run_args', ExecuteRunArgs) recon_pipeline = recon_pipeline_from_origin( execute_run_args.pipeline_origin) instance = DagsterInstance.from_ref(execute_run_args.instance_ref) pipeline_run = instance.get_run_by_id(execute_run_args.pipeline_run_id) pid = os.getpid() except: # pylint: disable=bare-except yield IPCErrorMessage( serializable_error_info=serializable_error_info_from_exc_info( sys.exc_info()), message='Error during RPC setup for ExecuteRun', ) return yield instance.report_engine_event( 'Started process for pipeline (pid: {pid}).'.format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end='cli_api_subprocess_init'), ) # This is so nasty but seemingly unavoidable # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/ closed = False try: for event in _core_execute_run(recon_pipeline, pipeline_run, instance): yield event except GeneratorExit: closed = True raise finally: if not closed: yield instance.report_engine_event( 'Process for pipeline exited (pid: {pid}).'.format(pid=pid), pipeline_run, )
def test_event_log_storage_store_events_and_wipe(self, storage): assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0 storage.store_event( EventRecord( None, "Message2", "debug", "", DEFAULT_RUN_ID, time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 1 assert storage.get_stats_for_run(DEFAULT_RUN_ID) storage.wipe() assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0
def test_in_memory_event_log_storage_store_events_and_wipe(): storage = InMemoryEventLogStorage() assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run('foo')) == 1 storage.wipe() assert len(storage.get_logs_for_run('foo')) == 0
def test_event_log_storage_store_events_and_wipe(event_storage_factory_cm_fn): with event_storage_factory_cm_fn() as storage: assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run('foo')) == 1 assert storage.get_stats_for_run('foo') storage.wipe() assert len(storage.get_logs_for_run('foo')) == 0
def test_filesystem_event_log_storage_store_events_and_wipe(): with seven.TemporaryDirectory() as tmpdir_path: storage = SqliteEventLogStorage(tmpdir_path) assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run('foo')) == 1 storage.wipe() assert len(storage.get_logs_for_run('foo')) == 0
def test_event_log_delete(event_storage_factory_cm_fn): with event_storage_factory_cm_fn() as storage: assert len(storage.get_logs_for_run("foo")) == 0 storage.store_event( EventRecord( None, "Message2", "debug", "", "foo", time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, "nonce", event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run("foo")) == 1 assert storage.get_stats_for_run("foo") storage.delete_events("foo") assert len(storage.get_logs_for_run("foo")) == 0
def start_run(self, execute_run_args): check.inst_param(execute_run_args, "execute_run_args", ExecuteExternalPipelineArgs) with DagsterInstance.from_ref(execute_run_args.instance_ref) as instance: try: res = self._query( "StartRun", api_pb2.StartRunRequest, serialized_execute_run_args=serialize_dagster_namedtuple(execute_run_args), ) return deserialize_json_to_dagster_namedtuple(res.serialized_start_run_result) except Exception: # pylint: disable=bare-except pipeline_run = instance.get_run_by_id(execute_run_args.pipeline_run_id) instance.report_engine_event( message="Unexpected error in IPC client", pipeline_run=pipeline_run, engine_event_data=EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info()) ), ) raise