def launch_run(self, instance, run, external_pipeline): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None job_image_from_executor_config = exc_config.get("job_image") # If the user is using user-code deployments, we grab the image from the gRPC server. if isinstance( external_pipeline.get_external_origin(). external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}". format(type(repository_location_handle))) repository_name = external_pipeline.repository_handle.repository_name repository_origin = repository_location_handle.reload_repository_python_origin( repository_name) pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_origin) job_image = repository_origin.container_image env_vars = {"DAGSTER_CURRENT_IMAGE": job_image} if job_image_from_executor_config: raise DagsterInvariantViolationError( "You have specified a job_image {job_image_from_executor_config} in your executor configuration, " "but also {job_image} in your user-code deployment. You cannot specify a job_image " "in your executor config when using user-code deployments because the job image is " "pulled from the deployment. To resolve this error, remove the job_image " "configuration from your executor configuration (which is a part of your run configuration)" ) else: if not job_image_from_executor_config: raise DagsterInvariantViolationError( "You have not specified a job_image in your executor configuration. " "To resolve this error, specify the job_image configuration in the executor " "config section in your run config. \n" "Note: You may also be seeing this error because you are using the configured API. " "Using configured with the celery-k8s executor is not supported at this time, " "and the job_image must be configured at the top-level executor config without " "using configured.") job_image = job_image_from_executor_config pipeline_origin = external_pipeline.get_python_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") self._batch_api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run_coordinator job launched", run, EngineEventData([ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = "dagster-run-{}".format(run.run_id) pod_name = job_name user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags)) pipeline_origin = None job_config = None if isinstance( external_pipeline.get_external_origin().external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): if self._job_image: raise DagsterInvariantViolationError( "Cannot specify job_image in run launcher config when loading pipeline " "from GRPC server." ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}".format( type(repository_location_handle) ) ) repository_name = external_pipeline.repository_handle.repository_name repository_origin = repository_location_handle.reload_repository_python_origin( repository_name ) job_image = repository_origin.container_image pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_origin ) job_config = self._get_grpc_job_config(job_image) else: pipeline_origin = external_pipeline.get_python_origin() job_config = self._get_static_job_config() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config=job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( "Kubernetes run worker job launched", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) return run
def run_iteration_error(_, _instance, _workspace): if should_raise_errors: raise DagsterInvariantViolationError("foobar") yield
def run_iteration_error(_, _instance, _workspace): raise DagsterInvariantViolationError("foobar") yield # pylint: disable=unreachable
def launch_scheduled_execution(output_file, schedule_name, override_system_timezone, **kwargs): with (mock_system_timezone(override_system_timezone) if override_system_timezone else nullcontext()): with ipc_write_stream(output_file) as stream: with DagsterInstance.get() as instance: repository_origin = get_repository_origin_from_kwargs(kwargs) job_origin = repository_origin.get_job_origin(schedule_name) # open the tick scope before we load any external artifacts so that # load errors are stored in DB with _schedule_tick_context( instance, stream, JobTickData( job_origin_id=job_origin.get_id(), job_name=schedule_name, job_type=JobType.SCHEDULE, status=JobTickStatus.STARTED, timestamp=time.time(), ), ) as tick_context: with get_repository_location_from_kwargs( kwargs) as repo_location: repo_dict = repo_location.get_repositories() check.invariant( repo_dict and len(repo_dict) == 1, "Passed in arguments should reference exactly one repository, instead there are {num_repos}" .format(num_repos=len(repo_dict)), ) external_repo = next(iter(repo_dict.values())) if not schedule_name in [ schedule.name for schedule in external_repo.get_external_schedules() ]: raise DagsterInvariantViolationError( "Could not find schedule named {schedule_name}" .format(schedule_name=schedule_name), ) external_schedule = external_repo.get_external_schedule( schedule_name) # Validate that either the schedule has no timezone or it matches # the system timezone schedule_timezone = external_schedule.execution_timezone if schedule_timezone: system_timezone = pendulum.now().timezone.name if system_timezone != external_schedule.execution_timezone: raise DagsterInvariantViolationError( "Schedule {schedule_name} is set to execute in {schedule_timezone}, " "but this scheduler can only run in the system timezone, " "{system_timezone}. Use DagsterDaemonScheduler if you want to be able " "to execute schedules in arbitrary timezones." .format( schedule_name=external_schedule.name, schedule_timezone=schedule_timezone, system_timezone=system_timezone, ), ) _launch_scheduled_executions(instance, repo_location, external_repo, external_schedule, tick_context)
def loadable_targets_from_loaded_module( module: ModuleType) -> Sequence[LoadableTarget]: loadable_repos = _loadable_targets_of_type(module, RepositoryDefinition) if loadable_repos: return loadable_repos loadable_pipelines = _loadable_targets_of_type(module, PipelineDefinition) loadable_jobs = _loadable_targets_of_type(module, JobDefinition) if len(loadable_pipelines) == 1: return loadable_pipelines elif len(loadable_pipelines) > 1: target_type = "job" if len(loadable_jobs) > 1 else "pipeline" raise DagsterInvariantViolationError(( 'No repository and more than one {target_type} found in "{module_name}". If you load ' "a file or module directly it must have only one {target_type} " "in scope. Found {target_type}s defined in variables or decorated " "functions: {pipeline_symbols}.").format( module_name=module.__name__, pipeline_symbols=repr( [p.attribute for p in loadable_pipelines]), target_type=target_type, )) loadable_graphs = _loadable_targets_of_type(module, GraphDefinition) if len(loadable_graphs) == 1: return loadable_graphs elif len(loadable_graphs) > 1: raise DagsterInvariantViolationError(( 'More than one graph found in "{module_name}". ' "If you load a file or module directly and it has no repositories, jobs, or " "pipelines in scope, it must have no more than one graph in scope. " "Found graphs defined in variables or decorated functions: {graph_symbols}." ).format( module_name=module.__name__, graph_symbols=repr([g.attribute for g in loadable_graphs]), )) loadable_asset_groups = _loadable_targets_of_type(module, AssetGroup) if len(loadable_asset_groups) == 1: return loadable_asset_groups elif len(loadable_asset_groups) > 1: var_names = repr([a.attribute for a in loadable_asset_groups]) raise DagsterInvariantViolationError(( f'More than one asset group found in "{module.__name__}". ' "If you load a file or module directly and it has no repositories, jobs, " "pipeline, or graphs in scope, it must have no more than one asset group in scope. " f"Found asset groups defined in variables: {var_names}.")) asset_group_from_module_assets = AssetGroup.from_modules([module]) if (len(asset_group_from_module_assets.assets) > 0 or len(asset_group_from_module_assets.source_assets) > 0): return [ LoadableTarget(LOAD_ALL_ASSETS, asset_group_from_module_assets) ] raise DagsterInvariantViolationError( "No repositories, jobs, pipelines, graphs, asset groups, or asset definitions found in " f'"{module.__name__}".')
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, "run", PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if exc_config.get("job_image"): raise DagsterInvariantViolationError( "Cannot specify job_image in executor config when loading pipeline " "from GRPC server." ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}".format( type(repository_location_handle) ) ) job_image = repository_location_handle.get_current_image() env_vars = {"DAGSTER_CURRENT_IMAGE": job_image} repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle.get_repository_python_origin( repository_name ), ) else: job_image = exc_config.get("job_image") if not job_image: raise DagsterInvariantViolationError( "Cannot find job_image in celery-k8s executor config." ) pipeline_origin = external_pipeline.get_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) user_defined_k8s_config = get_user_defined_k8s_config(frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config, command=["dagster"], args=["api", "execute_run_with_structured_logs", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run_coordinator job launched", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(pod_name, "Kubernetes Pod name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=CeleryK8sRunLauncher, ) return run
def launch_run(self, context: LaunchRunContext) -> None: run = context.pipeline_run job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) env_vars = None job_image_from_executor_config = exc_config.get("job_image") pipeline_origin = context.pipeline_code_origin repository_origin = pipeline_origin.repository_origin job_image = repository_origin.container_image if job_image: if job_image_from_executor_config: job_image = job_image_from_executor_config self._instance.report_engine_event( f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, " f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} " f"from executor configuration as it takes precedence.", run, cls=self.__class__, ) else: if not job_image_from_executor_config: raise DagsterInvariantViolationError( "You have not specified a job_image in your executor configuration. " "To resolve this error, specify the job_image configuration in the executor " "config section in your run config. \n" "Note: You may also be seeing this error because you are using the configured API. " "Using configured with the celery-k8s executor is not supported at this time, " "and the job_image must be configured at the top-level executor config without " "using configured." ) job_image = job_image_from_executor_config job_config = self.get_k8s_job_config(job_image, exc_config) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags)) from dagster.cli.api import ExecuteRunArgs run_args = ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=self._instance.get_ref(), set_exit_code_on_failure=self._fail_pod_on_run_failure, ).get_command_args() job = construct_dagster_k8s_job( job_config, args=run_args, job_name=job_name, pod_name=pod_name, component="run_worker", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, labels={ "dagster/job": pipeline_origin.pipeline_name, }, ) job_namespace = exc_config.get("job_namespace") self._instance.report_engine_event( "Creating Kubernetes run worker job", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) self._batch_api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run worker job created", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, )
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) job_name = 'dagster-run-{}'.format(run.run_id) pod_name = job_name resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags)) pipeline_origin = None job_config = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if self._job_image: raise DagsterInvariantViolationError( 'Cannot specify job_image in run launcher config when loading pipeline ' 'from GRPC server.' ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( 'Expected RepositoryLocationHandle to be of type ' 'GrpcServerRepositoryLocationHandle but found type {}'.format( type(repository_location_handle) ) ) job_image = repository_location_handle.get_current_image() job_config = self._get_grpc_job_config(job_image) repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle.get_repository_python_origin( repository_name ), ) else: pipeline_origin = external_pipeline.get_origin() job_config = self._get_static_job_config() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config=job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData( [ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ] ), cls=K8sRunLauncher, ) return run
def launch_run(self, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) env_vars = None job_image_from_executor_config = exc_config.get("job_image") pipeline_origin = external_pipeline.get_python_origin() repository_origin = pipeline_origin.repository_origin job_image = repository_origin.container_image if job_image: if job_image_from_executor_config: job_image = job_image_from_executor_config self._instance.report_engine_event( f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, " f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} " f"from executor configuration as it takes precedence.", run, cls=self.__class__, ) else: if not job_image_from_executor_config: raise DagsterInvariantViolationError( "You have not specified a job_image in your executor configuration. " "To resolve this error, specify the job_image configuration in the executor " "config section in your run config. \n" "Note: You may also be seeing this error because you are using the configured API. " "Using configured with the celery-k8s executor is not supported at this time, " "and the job_image must be configured at the top-level executor config without " "using configured." ) job_image = job_image_from_executor_config job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) self._instance.add_run_tags( run.run_id, {DOCKER_IMAGE_TAG: job_config.job_image}, ) user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") self._instance.report_engine_event( "Creating Kubernetes run worker job", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) self._batch_api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run worker job created", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) return run
def run_iteration_error(_, _instance, _grpc_server_registry): raise DagsterInvariantViolationError("foobar") yield # pylint: disable=unreachable
def run_iteration_error(_, _instance, _workspace): if should_raise_errors: error_count["count"] = error_count["count"] + 1 raise DagsterInvariantViolationError("foobar:" + str(error_count["count"])) yield
def run_iteration_error(_): raise DagsterInvariantViolationError("foobar") yield # pylint: disable=unreachable
def run_iteration_error(_): raise DagsterInvariantViolationError("foobar")