def validate_tags(tags): valid_tags = {} for key, value in check.opt_dict_param(tags, "tags", key_type=str).items(): if not isinstance(value, str): valid = False err_reason = 'Could not JSON encode value "{}"'.format(value) try: str_val = seven.json.dumps(value) err_reason = 'JSON encoding "{json}" of value "{val}" is not equivalent to original value'.format( json=str_val, val=value) valid = seven.json.loads(str_val) == value except Exception: # pylint: disable=broad-except pass if not valid: raise DagsterInvalidDefinitionError( 'Invalid value for tag "{key}", {err_reason}. Tag values must be strings ' "or meet the constraint that json.loads(json.dumps(value)) == value." .format(key=key, err_reason=err_reason)) valid_tags[key] = str_val else: valid_tags[key] = value return frozentags(valid_tags)
def launch_step(self, step_handler_context: StepHandlerContext): events = [] assert (len( step_handler_context.execute_step_args.step_keys_to_execute) == 1 ), "Launching multiple steps is not currently supported" step_key = step_handler_context.execute_step_args.step_keys_to_execute[ 0] k8s_name_key = get_k8s_job_name( step_handler_context.execute_step_args.pipeline_run_id, step_key, ) job_name = "dagster-job-%s" % (k8s_name_key) pod_name = "dagster-job-%s" % (k8s_name_key) input_json = serialize_dagster_namedtuple( step_handler_context.execute_step_args) args = ["dagster", "api", "execute_step", input_json] job_config = self._job_config if not job_config.job_image: job_config = job_config.with_image( step_handler_context.execute_step_args.pipeline_origin. repository_origin.container_image) if not job_config.job_image: raise Exception( "No image included in either executor config or the pipeline") user_defined_k8s_config = get_user_defined_k8s_config( frozentags(step_handler_context.step_tags[step_key])) job = construct_dagster_k8s_job( job_config=job_config, args=args, job_name=job_name, pod_name=pod_name, component="step_worker", user_defined_k8s_config=user_defined_k8s_config, ) events.append( DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=step_handler_context.execute_step_args. pipeline_origin.pipeline_name, step_key=step_key, message= f"Executing step {step_key} in Kubernetes job {job_name}", event_specific_data=EngineEventData([ EventMetadataEntry.text(step_key, "Step key"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), ], ), )) self._batch_api.create_namespaced_job(body=job, namespace=self._job_namespace) return events
def __new__(cls, name, alias=None, tags=None): name = check.str_param(name, 'name') alias = check.opt_str_param(alias, 'alias') tags = frozentags( check.opt_dict_param(tags, 'tags', value_type=str, key_type=str)) return super(cls, SolidInvocation).__new__(cls, name, alias, tags)
def tag(self, tags): tags = validate_tags(tags) return CallableSolidNode( self.solid_def, self.given_alias, frozentags(tags) if self.tags is None else self.tags.updated_with(tags), )
def tag(self, tags): tags = validate_tags(tags) return PendingNodeInvocation( self.node_def, self.given_alias, frozentags(tags) if self.tags is None else self.tags.updated_with(tags), )
def tag(self, tags): tags = validate_tags(tags) return PendingNodeInvocation( node_def=self.node_def, given_alias=self.given_alias, tags=frozentags(tags) if self.tags is None else self.tags.updated_with(tags), hook_defs=self.hook_defs, )
def __new__(cls, name, alias=None, tags=None, hook_defs=None): name = check.str_param(name, "name") alias = check.opt_str_param(alias, "alias") tags = frozentags( check.opt_dict_param(tags, "tags", value_type=str, key_type=str)) hook_defs = frozenset( check.opt_set_param(hook_defs, "hook_defs", of_type=HookDefinition)) return super(cls, SolidInvocation).__new__(cls, name, alias, tags, hook_defs)
def _launch_k8s_job_with_args(self, job_name, args, run, pipeline_origin): pod_name = job_name user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags)) repository_origin = pipeline_origin.repository_origin job_config = ( self._get_grpc_job_config(repository_origin.container_image) if repository_origin.container_image else self.get_static_job_config() ) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) job = construct_dagster_k8s_job( job_config=job_config, args=args, job_name=job_name, pod_name=pod_name, component="run_worker", user_defined_k8s_config=user_defined_k8s_config, labels={ "dagster/job": pipeline_origin.pipeline_name, }, ) self._instance.report_engine_event( "Creating Kubernetes run worker job", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( "Kubernetes run worker job created", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, )
def __new__(cls, name, alias=None, resource_mapper_fn=None, tags=None): name = check.str_param(name, 'name') alias = check.opt_str_param(alias, 'alias') resource_mapper_fn = check.opt_callable_param( resource_mapper_fn, 'resource_mapper_fn', SolidInvocation.default_resource_mapper_fn) tags = frozentags( check.opt_dict_param(tags, 'tags', value_type=str, key_type=str)) return super(cls, SolidInvocation).__new__(cls, name, alias, resource_mapper_fn, tags)
def launch_run(self, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = "dagster-run-{}".format(run.run_id) pod_name = job_name user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) pipeline_origin = external_pipeline.get_python_origin() repository_origin = pipeline_origin.repository_origin job_config = (self._get_grpc_job_config( repository_origin.container_image) if repository_origin.container_image else self.get_static_job_config()) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config=job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( "Kubernetes run worker job launched", run, EngineEventData([ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, ) return run
def launch_steps( self, step_contexts: List[IStepContext], known_state: KnownExecutionState, ): assert len( step_contexts ) == 1, "Launching multiple steps is not currently supported" step_context = step_contexts[0] k8s_name_key = get_k8s_job_name( self.pipeline_context.plan_data.pipeline_run.run_id, step_context.step.key, ) job_name = "dagster-job-%s" % (k8s_name_key) pod_name = "dagster-job-%s" % (k8s_name_key) pipeline_origin = self.pipeline_context.reconstructable_pipeline.get_python_origin( ) execute_step_args = ExecuteStepArgs( pipeline_origin=pipeline_origin, pipeline_run_id=self.pipeline_context.pipeline_run.run_id, step_keys_to_execute=[step_context.step.key], instance_ref=self.pipeline_context.instance.get_ref(), retry_mode=self.retries.for_inner_plan(), known_state=known_state, should_verify_step=True, ) input_json = serialize_dagster_namedtuple(execute_step_args) args = ["dagster", "api", "execute_step", input_json] job_config = self._job_config if not job_config.job_image: job_config = job_config.with_image( pipeline_origin.repository_origin.container_image) if not job_config.job_image: raise Exception( "No image included in either executor config or the pipeline") job = construct_dagster_k8s_job( job_config, args, job_name, get_user_defined_k8s_config(frozentags()), pod_name, ) kubernetes.config.load_incluster_config() kubernetes.client.BatchV1Api().create_namespaced_job( body=job, namespace=self._job_namespace)
def _launch_k8s_job_with_args(self, job_name, args, run): container_context = self.get_container_context_for_run(run) pod_name = job_name pipeline_origin = run.pipeline_code_origin user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) repository_origin = pipeline_origin.repository_origin job_config = container_context.get_k8s_job_config( job_image=repository_origin.container_image, run_launcher=self) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) job = construct_dagster_k8s_job( job_config=job_config, args=args, job_name=job_name, pod_name=pod_name, component="run_worker", user_defined_k8s_config=user_defined_k8s_config, labels={ "dagster/job": pipeline_origin.pipeline_name, "dagster/run-id": run.run_id, }, ) self._instance.report_engine_event( "Creating Kubernetes run worker job", run, EngineEventData([ MetadataEntry("Kubernetes Job name", value=job_name), MetadataEntry("Kubernetes Namespace", value=container_context.namespace), MetadataEntry("Run ID", value=run.run_id), ]), cls=self.__class__, ) self._batch_api.create_namespaced_job( body=job, namespace=container_context.namespace) self._instance.report_engine_event( "Kubernetes run worker job created", run, cls=self.__class__, )
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) job_name = 'dagster-run-{}'.format(run.run_id) pod_name = job_name resources = get_k8s_resource_requirements( frozentags(external_pipeline.tags)) job = construct_dagster_k8s_job( job_config=self.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({ 'runId': run.run_id, 'repositoryName': external_pipeline.handle.repository_name, 'repositoryLocationName': external_pipeline.handle.location_name, }), '--remap-sigterm', ], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData([ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ]), cls=K8sRunLauncher, ) return run
def __new__( cls, name: str, alias: Optional[str] = None, tags: Dict[str, str] = None, hook_defs: AbstractSet[HookDefinition] = None, retry_policy: Optional[RetryPolicy] = None, ): return super().__new__( cls, name=check.str_param(name, "name"), alias=check.opt_str_param(alias, "alias"), tags=frozentags(check.opt_dict_param(tags, "tags", value_type=str, key_type=str)), hook_defs=frozenset( check.opt_set_param(hook_defs, "hook_defs", of_type=HookDefinition) ), retry_policy=check.opt_inst_param(retry_policy, "retry_policy", RetryPolicy), )
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=exc_config.get('job_image'), image_pull_policy=exc_config.get('image_pull_policy'), image_pull_secrets=exc_config.get('image_pull_secrets'), service_account_name=exc_config.get('service_account_name'), env_config_maps=exc_config.get('env_config_maps'), env_secrets=exc_config.get('env_secrets'), ) resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=external_pipeline.get_origin(), pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) job_namespace = exc_config.get('job_namespace') api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData( [ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ] ), cls=CeleryK8sRunLauncher, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if exc_config.get('job_image'): raise DagsterInvariantViolationError( 'Cannot specify job_image in executor config when loading pipeline ' 'from GRPC server.') repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( 'Expected RepositoryLocationHandle to be of type ' 'GrpcServerRepositoryLocationHandle but found type {}'. format(type(repository_location_handle))) job_image = repository_location_handle.get_current_image() env_vars = {'DAGSTER_CURRENT_IMAGE': job_image} repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle. get_repository_python_origin(repository_name), ) else: job_image = exc_config.get('job_image') if not job_image: raise DagsterInvariantViolationError( 'Cannot find job_image in celery-k8s executor config.') pipeline_origin = external_pipeline.get_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, 'job_image'), image_pull_policy=exc_config.get('image_pull_policy'), image_pull_secrets=exc_config.get('image_pull_secrets'), service_account_name=exc_config.get('service_account_name'), env_config_maps=exc_config.get('env_config_maps'), env_secrets=exc_config.get('env_secrets'), ) resources = get_k8s_resource_requirements( frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='run_coordinator', resources=resources, env_vars=env_vars, ) job_namespace = exc_config.get('job_namespace') api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( 'Kubernetes run_coordinator job launched', run, EngineEventData([ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ]), cls=CeleryK8sRunLauncher, ) return run
def tags(self): return frozentags(**merge_dicts(self._graph_def.tags, self._tags))
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=exc_config.get('job_image'), image_pull_policy=exc_config.get('image_pull_policy'), image_pull_secrets=exc_config.get('image_pull_secrets'), service_account_name=exc_config.get('service_account_name'), env_config_maps=exc_config.get('env_config_maps'), env_secrets=exc_config.get('env_secrets'), ) resources = get_k8s_resource_requirements( frozentags(external_pipeline.tags)) job = construct_dagster_graphql_k8s_job( job_config, args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({ 'runId': run.run_id, 'repositoryName': external_pipeline.handle.repository_name, 'repositoryLocationName': external_pipeline.handle.location_name, }), '--remap-sigterm', ], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) job_namespace = exc_config.get('job_namespace') api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData([ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ]), cls=CeleryK8sRunLauncher, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None job_image_from_executor_config = exc_config.get("job_image") # If the user is using user-code deployments, we grab the image from the gRPC server. if isinstance( external_pipeline.get_external_origin(). external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}". format(type(repository_location_handle))) repository_name = external_pipeline.repository_handle.repository_name repository_origin = repository_location_handle.reload_repository_python_origin( repository_name) pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_origin) job_image = repository_origin.container_image env_vars = {"DAGSTER_CURRENT_IMAGE": job_image} if job_image_from_executor_config: raise DagsterInvariantViolationError( "You have specified a job_image {job_image_from_executor_config} in your executor configuration, " "but also {job_image} in your user-code deployment. You cannot specify a job_image " "in your executor config when using user-code deployments because the job image is " "pulled from the deployment. To resolve this error, remove the job_image " "configuration from your executor configuration (which is a part of your run configuration)" ) else: if not job_image_from_executor_config: raise DagsterInvariantViolationError( "You have not specified a job_image in your executor configuration. " "To resolve this error, specify the job_image configuration in the executor " "config section in your run config. \n" "Note: You may also be seeing this error because you are using the configured API. " "Using configured with the celery-k8s executor is not supported at this time, " "and the job_image must be configured at the top-level executor config without " "using configured.") job_image = job_image_from_executor_config pipeline_origin = external_pipeline.get_python_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") self._batch_api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run_coordinator job launched", run, EngineEventData([ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = "dagster-run-{}".format(run.run_id) pod_name = job_name user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags)) pipeline_origin = None job_config = None if isinstance( external_pipeline.get_external_origin().external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): if self._job_image: raise DagsterInvariantViolationError( "Cannot specify job_image in run launcher config when loading pipeline " "from GRPC server." ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}".format( type(repository_location_handle) ) ) repository_name = external_pipeline.repository_handle.repository_name repository_origin = repository_location_handle.reload_repository_python_origin( repository_name ) job_image = repository_origin.container_image pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_origin ) job_config = self._get_grpc_job_config(job_image) else: pipeline_origin = external_pipeline.get_python_origin() job_config = self._get_static_job_config() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config=job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( "Kubernetes run worker job launched", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) return run
def launch_step(self, step_handler_context: StepHandlerContext): events = [] assert ( len(step_handler_context.execute_step_args.step_keys_to_execute) == 1 ), "Launching multiple steps is not currently supported" step_key = step_handler_context.execute_step_args.step_keys_to_execute[0] job_name = self._get_k8s_step_job_name(step_handler_context) pod_name = job_name args = step_handler_context.execute_step_args.get_command_args() container_context = self._get_container_context(step_handler_context) job_config = container_context.get_k8s_job_config( self._executor_image, step_handler_context.instance.run_launcher ) if not job_config.job_image: job_config = job_config.with_image( step_handler_context.execute_step_args.pipeline_origin.repository_origin.container_image ) if not job_config.job_image: raise Exception("No image included in either executor config or the job") user_defined_k8s_config = get_user_defined_k8s_config( frozentags(step_handler_context.step_tags[step_key]) ) job = construct_dagster_k8s_job( job_config=job_config, args=args, job_name=job_name, pod_name=pod_name, component="step_worker", user_defined_k8s_config=user_defined_k8s_config, labels={ "dagster/job": step_handler_context.execute_step_args.pipeline_origin.pipeline_name, "dagster/op": step_key, "dagster/run-id": step_handler_context.execute_step_args.pipeline_run_id, }, ) events.append( DagsterEvent( event_type_value=DagsterEventType.ENGINE_EVENT.value, pipeline_name=step_handler_context.execute_step_args.pipeline_origin.pipeline_name, step_key=step_key, message=f"Executing step {step_key} in Kubernetes job {job_name}", event_specific_data=EngineEventData( [ MetadataEntry("Step key", value=step_key), MetadataEntry("Kubernetes Job name", value=job_name), ], ), ) ) self._batch_api.create_namespaced_job(body=job, namespace=container_context.namespace) return events
def launch_run(self, instance, run, external_pipeline): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if exc_config.get("job_image"): raise DagsterInvariantViolationError( "Cannot specify job_image in executor config when loading pipeline " "from GRPC server." ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}".format( type(repository_location_handle) ) ) job_image = repository_location_handle.get_current_image() env_vars = {"DAGSTER_CURRENT_IMAGE": job_image} repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle.get_repository_python_origin( repository_name ), ) else: job_image = exc_config.get("job_image") if not job_image: raise DagsterInvariantViolationError( "Cannot find job_image in celery-k8s executor config." ) pipeline_origin = external_pipeline.get_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) user_defined_k8s_config = get_user_defined_k8s_config(frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config, command=["dagster"], args=["api", "execute_run_with_structured_logs", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run_coordinator job launched", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(pod_name, "Kubernetes Pod name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) return run
def launch_run(self, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) env_vars = None job_image_from_executor_config = exc_config.get("job_image") pipeline_origin = external_pipeline.get_python_origin() repository_origin = pipeline_origin.repository_origin job_image = repository_origin.container_image if job_image: if job_image_from_executor_config: job_image = job_image_from_executor_config self._instance.report_engine_event( f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, " f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} " f"from executor configuration as it takes precedence.", run, cls=self.__class__, ) else: if not job_image_from_executor_config: raise DagsterInvariantViolationError( "You have not specified a job_image in your executor configuration. " "To resolve this error, specify the job_image configuration in the executor " "config section in your run config. \n" "Note: You may also be seeing this error because you are using the configured API. " "Using configured with the celery-k8s executor is not supported at this time, " "and the job_image must be configured at the top-level executor config without " "using configured." ) job_image = job_image_from_executor_config job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") self._instance.report_engine_event( "Creating Kubernetes run worker job", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) self._batch_api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run worker job created", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) return run
def launch_run(self, context: LaunchRunContext) -> None: run = context.pipeline_run job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) env_vars = None job_image_from_executor_config = exc_config.get("job_image") pipeline_origin = context.pipeline_code_origin repository_origin = pipeline_origin.repository_origin job_image = repository_origin.container_image if job_image: if job_image_from_executor_config: job_image = job_image_from_executor_config self._instance.report_engine_event( f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, " f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} " f"from executor configuration as it takes precedence.", run, cls=self.__class__, ) else: if not job_image_from_executor_config: raise DagsterInvariantViolationError( "You have not specified a job_image in your executor configuration. " "To resolve this error, specify the job_image configuration in the executor " "config section in your run config. \n" "Note: You may also be seeing this error because you are using the configured API. " "Using configured with the celery-k8s executor is not supported at this time, " "and the job_image must be configured at the top-level executor config without " "using configured.") job_image = job_image_from_executor_config job_config = self.get_k8s_job_config(job_image, exc_config) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) from dagster.cli.api import ExecuteRunArgs run_args = ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=self._instance.get_ref(), ) job = construct_dagster_k8s_job( job_config, args=run_args.get_command_args(), job_name=job_name, pod_name=pod_name, component="run_worker", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, labels={ "dagster/job": pipeline_origin.pipeline_name, }, ) job_namespace = exc_config.get("job_namespace") self._instance.report_engine_event( "Creating Kubernetes run worker job", run, EngineEventData([ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, ) self._batch_api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run worker job created", run, EngineEventData([ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, )
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) job_name = 'dagster-run-{}'.format(run.run_id) pod_name = job_name resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags)) pipeline_origin = None job_config = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if self._job_image: raise DagsterInvariantViolationError( 'Cannot specify job_image in run launcher config when loading pipeline ' 'from GRPC server.' ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( 'Expected RepositoryLocationHandle to be of type ' 'GrpcServerRepositoryLocationHandle but found type {}'.format( type(repository_location_handle) ) ) job_image = repository_location_handle.get_current_image() job_config = self._get_grpc_job_config(job_image) repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle.get_repository_python_origin( repository_name ), ) else: pipeline_origin = external_pipeline.get_origin() job_config = self._get_static_job_config() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config=job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData( [ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ] ), cls=K8sRunLauncher, ) return run