def launch_step(self, step_handler_context: StepHandlerContext): events = [] assert (len( step_handler_context.execute_step_args.step_keys_to_execute) == 1 ), "Launching multiple steps is not currently supported" step_key = step_handler_context.execute_step_args.step_keys_to_execute[ 0] k8s_name_key = get_k8s_job_name( step_handler_context.execute_step_args.pipeline_run_id, step_key, ) job_name = "dagster-job-%s" % (k8s_name_key) pod_name = "dagster-job-%s" % (k8s_name_key) input_json = serialize_dagster_namedtuple( step_handler_context.execute_step_args) args = ["dagster", "api", "execute_step", input_json] job_config = self._job_config if not job_config.job_image: job_config = job_config.with_image( step_handler_context.execute_step_args.pipeline_origin. repository_origin.container_image) if not job_config.job_image: raise Exception( "No image included in either executor config or the pipeline") user_defined_k8s_config = get_user_defined_k8s_config( frozentags(step_handler_context.step_tags[step_key])) job = construct_dagster_k8s_job( job_config=job_config, args=args, job_name=job_name, pod_name=pod_name, component="step_worker", user_defined_k8s_config=user_defined_k8s_config, ) events.append( DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=step_handler_context.execute_step_args. pipeline_origin.pipeline_name, step_key=step_key, message= f"Executing step {step_key} in Kubernetes job {job_name}", event_specific_data=EngineEventData([ EventMetadataEntry.text(step_key, "Step key"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), ], ), )) self._batch_api.create_namespaced_job(body=job, namespace=self._job_namespace) return events
def test_pipeline_run_dagster_run(): # serialize in current code test_run = DagsterRun(pipeline_name="test") test_str = serialize_dagster_namedtuple(test_run) # deserialize in "legacy" code legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class PipelineRun( namedtuple( "_PipelineRun", "pipeline_name run_id run_config mode solid_selection solids_to_execute " "step_keys_to_execute status tags root_run_id parent_run_id " "pipeline_snapshot_id execution_plan_snapshot_id external_pipeline_origin " "pipeline_code_origin", ) ): pass @_whitelist_for_serdes(legacy_env) # pylint: disable=unused-variable class PipelineRunStatus(Enum): QUEUED = "QUEUED" NOT_STARTED = "NOT_STARTED" result = _deserialize_json(test_str, legacy_env) assert isinstance(result, PipelineRun) assert result.pipeline_name == test_run.pipeline_name
def test_skip_when_empty(): test_map = WhitelistMap() @_whitelist_for_serdes(whitelist_map=test_map) class SameSnapshotTuple(namedtuple("_Tuple", "foo")): def __new__(cls, foo): return super(SameSnapshotTuple, cls).__new__(cls, foo) # pylint: disable=bad-super-call old_tuple = SameSnapshotTuple(foo="A") old_serialized = serialize_dagster_namedtuple(old_tuple) old_snapshot = create_snapshot_id(old_tuple) # Without setting skip_when_empty, the ID changes @_whitelist_for_serdes(whitelist_map=test_map) class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")): # pylint: disable=function-redefined def __new__(cls, foo, bar=None): return super(SameSnapshotTuple, cls).__new__( # pylint: disable=bad-super-call cls, foo, bar) new_tuple_without_serializer = SameSnapshotTuple(foo="A") new_snapshot_without_serializer = create_snapshot_id( new_tuple_without_serializer) assert new_snapshot_without_serializer != old_snapshot # By setting a custom serializer and skip_when_empty, the snapshot stays the same # as long as the new field is None class SkipWhenEmptySerializer(DefaultNamedTupleSerializer): @classmethod def skip_when_empty(cls) -> Set[str]: return {"bar"} @_whitelist_for_serdes(whitelist_map=test_map, serializer=SkipWhenEmptySerializer) class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")): # pylint: disable=function-redefined def __new__(cls, foo, bar=None): return super(SameSnapshotTuple, cls).__new__( # pylint: disable=bad-super-call cls, foo, bar) new_tuple = SameSnapshotTuple(foo="A") new_snapshot = create_snapshot_id(new_tuple) assert old_snapshot == new_snapshot rehydrated_tuple = deserialize_json_to_dagster_namedtuple(old_serialized) assert rehydrated_tuple.foo == "A" assert rehydrated_tuple.bar is None new_tuple_with_bar = SameSnapshotTuple(foo="A", bar="B") assert new_tuple_with_bar.foo == "A" assert new_tuple_with_bar.bar == "B"
def launch_steps( self, step_contexts: List[IStepContext], known_state: KnownExecutionState, ): assert len( step_contexts ) == 1, "Launching multiple steps is not currently supported" step_context = step_contexts[0] k8s_name_key = get_k8s_job_name( self.pipeline_context.plan_data.pipeline_run.run_id, step_context.step.key, ) job_name = "dagster-job-%s" % (k8s_name_key) pod_name = "dagster-job-%s" % (k8s_name_key) pipeline_origin = self.pipeline_context.reconstructable_pipeline.get_python_origin( ) execute_step_args = ExecuteStepArgs( pipeline_origin=pipeline_origin, pipeline_run_id=self.pipeline_context.pipeline_run.run_id, step_keys_to_execute=[step_context.step.key], instance_ref=self.pipeline_context.instance.get_ref(), retry_mode=self.retries.for_inner_plan(), known_state=known_state, should_verify_step=True, ) input_json = serialize_dagster_namedtuple(execute_step_args) args = ["dagster", "api", "execute_step", input_json] job_config = self._job_config if not job_config.job_image: job_config = job_config.with_image( pipeline_origin.repository_origin.container_image) if not job_config.job_image: raise Exception( "No image included in either executor config or the pipeline") job = construct_dagster_k8s_job( job_config, args, job_name, get_user_defined_k8s_config(frozentags()), pod_name, ) kubernetes.config.load_incluster_config() kubernetes.client.BatchV1Api().create_namespaced_job( body=job, namespace=self._job_namespace)
def test_solid_handle_node_handle(): # serialize in current code test_handle = NodeHandle("test", None) test_str = serialize_dagster_namedtuple(test_handle) # deserialize in "legacy" code legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class SolidHandle(namedtuple("_SolidHandle", "name parent")): pass result = _deserialize_json(test_str, legacy_env) assert isinstance(result, SolidHandle) assert result.name == test_handle.name
def to_json(self): return serialize_dagster_namedtuple(self)
def test_external_job_origin_instigator_origin(): def build_legacy_whitelist_map(): legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class ExternalJobOrigin( namedtuple("_ExternalJobOrigin", "external_repository_origin job_name")): def get_id(self): return create_snapshot_id(self) @_whitelist_for_serdes(legacy_env) class ExternalRepositoryOrigin( namedtuple("_ExternalRepositoryOrigin", "repository_location_origin repository_name")): def get_id(self): return create_snapshot_id(self) class GrpcServerOriginSerializer(DefaultNamedTupleSerializer): @classmethod def skip_when_empty(cls): return {"use_ssl"} @_whitelist_for_serdes(whitelist_map=legacy_env, serializer=GrpcServerOriginSerializer) class GrpcServerRepositoryLocationOrigin( namedtuple( "_GrpcServerRepositoryLocationOrigin", "host port socket location_name use_ssl", ), ): def __new__(cls, host, port=None, socket=None, location_name=None, use_ssl=None): return super(GrpcServerRepositoryLocationOrigin, cls).__new__(cls, host, port, socket, location_name, use_ssl) return ( legacy_env, ExternalJobOrigin, ExternalRepositoryOrigin, GrpcServerRepositoryLocationOrigin, ) legacy_env, klass, repo_klass, location_klass = build_legacy_whitelist_map( ) from dagster.core.host_representation.origin import ( ExternalInstigatorOrigin, ExternalRepositoryOrigin, GrpcServerRepositoryLocationOrigin, ) # serialize from current code, compare against old code instigator_origin = ExternalInstigatorOrigin( external_repository_origin=ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( host="localhost", port=1234, location_name="test_location"), repository_name="the_repo", ), instigator_name="simple_schedule", ) instigator_origin_str = serialize_dagster_namedtuple(instigator_origin) instigator_to_job = _deserialize_json(instigator_origin_str, legacy_env) assert isinstance(instigator_to_job, klass) # ensure that the origin id is stable assert instigator_to_job.get_id() == instigator_origin.get_id() # # serialize from old code, compare against current code job_origin = klass( external_repository_origin=repo_klass( repository_location_origin=location_klass( host="localhost", port=1234, location_name="test_location"), repository_name="the_repo", ), job_name="simple_schedule", ) job_origin_str = serialize_value(job_origin, legacy_env) from dagster.serdes.serdes import _WHITELIST_MAP job_to_instigator = deserialize_json_to_dagster_namedtuple(job_origin_str) assert isinstance(job_to_instigator, ExternalInstigatorOrigin) # ensure that the origin id is stable assert job_to_instigator.get_id() == job_origin.get_id()
def test_legacy_event_log_load(): # ensure EventLogEntry 0.14.3+ can still be loaded by older dagster versions # to avoid downgrades etc from creating operational issues legacy_env = WhitelistMap.create() # snapshot of EventLogEntry pre commit ea19544 @_whitelist_for_serdes( whitelist_map=legacy_env, storage_name="EventLogEntry", # use this to avoid collision with current EventLogEntry ) class OldEventLogEntry( # pylint: disable=unused-variable NamedTuple( "_OldEventLogEntry", [ ("error_info", Optional[SerializableErrorInfo]), ("message", str), ("level", Union[str, int]), ("user_message", str), ("run_id", str), ("timestamp", float), ("step_key", Optional[str]), ("pipeline_name", Optional[str]), ("dagster_event", Optional[DagsterEvent]), ], ) ): def __new__( cls, error_info, message, level, user_message, run_id, timestamp, step_key=None, pipeline_name=None, dagster_event=None, job_name=None, ): pipeline_name = pipeline_name or job_name return super().__new__( cls, check.opt_inst_param(error_info, "error_info", SerializableErrorInfo), check.str_param(message, "message"), level, # coerce_valid_log_level call omitted check.str_param(user_message, "user_message"), check.str_param(run_id, "run_id"), check.float_param(timestamp, "timestamp"), check.opt_str_param(step_key, "step_key"), check.opt_str_param(pipeline_name, "pipeline_name"), check.opt_inst_param(dagster_event, "dagster_event", DagsterEvent), ) # current event log entry new_event = EventLogEntry( user_message="test 1 2 3", error_info=None, level="debug", run_id="fake_run_id", timestamp=time.time(), ) storage_str = serialize_dagster_namedtuple(new_event) result = _deserialize_json(storage_str, legacy_env) assert result.message is not None