def __new__( cls, handle: Union[StepHandle, ResolvedFromDynamicStepHandle], pipeline_name: str, step_inputs: List[StepInput], step_outputs: List[StepOutput], tags: Optional[Dict[str, str]], logging_tags: Optional[Dict[str, str]] = None, ): return super(ExecutionStep, cls).__new__( cls, handle=check.inst_param( handle, "handle", (StepHandle, ResolvedFromDynamicStepHandle)), pipeline_name=check.str_param(pipeline_name, "pipeline_name"), step_input_dict={ si.name: si for si in check.list_param( step_inputs, "step_inputs", of_type=StepInput) }, step_output_dict={ so.name: so for so in check.list_param( step_outputs, "step_outputs", of_type=StepOutput) }, tags=validate_tags(check.opt_dict_param(tags, "tags", key_type=str)), logging_tags=merge_dicts( { "step_key": handle.to_key(), "pipeline": pipeline_name, "solid": handle.solid_handle.name, }, check.opt_dict_param(logging_tags, "logging_tags"), ), )
def make_dagster_pipeline_from_airflow_dag(dag, tags=None): '''Construct a Dagster pipeline corresponding to a given Airflow DAG. Tasks in the resulting pipeline will execute the execute() method on the corresponding Airflow Operator. Dagster, any dependencies required by Airflow Operators, and the module containing your DAG definition must be available in the Python environment within which your Dagster solids execute. To set Airflow's `execution_date` for use with Airflow Operator's execute() methods, either (1) (Best for ad hoc runs) Run Pipeline with 'default' preset, which sets execution_date to the time (in UTC) of pipeline invocation ``` execute_pipeline( pipeline=make_dagster_pipeline_from_airflow_dag(dag), preset='default') ``` (2) Add {'airflow_execution_date': utc_date_string} to the PipelineDefinition tags. This will override behavior from (1). ``` execute_pipeline( make_dagster_pipeline_from_airflow_dag( dag, {'airflow_execution_date': utc_execution_date_str} ) ) ``` (3) (Recommended) Add {'airflow_execution_date': utc_date_string} to the PipelineRun tags, such as in the Dagit UI. This will override behavior from (1) and (2) Args: dag (DAG): The Airflow DAG to compile into a Dagster pipeline tags (Dict[str, Field]): Pipeline tags. Optionally include `tags={'airflow_execution_date': utc_date_string}` to specify execution_date used within execution of Airflow Operators. Returns: pipeline_def (PipelineDefinition): The generated Dagster pipeline ''' check.inst_param(dag, 'dag', DAG) tags = check.opt_dict_param(tags, 'tags') if IS_AIRFLOW_INGEST_PIPELINE_STR not in tags: tags[IS_AIRFLOW_INGEST_PIPELINE_STR] = 'true' tags = validate_tags(tags) pipeline_dependencies, solid_defs = _get_pipeline_definition_args(dag) pipeline_def = PipelineDefinition( name='airflow_' + dag.dag_id, solid_defs=solid_defs, dependencies=pipeline_dependencies, tags=tags, ) return pipeline_def
def test_valid_job_format_with_resources(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ 'requests': { 'cpu': '250m', 'memory': '64Mi' }, 'limits': { 'cpu': '500m', 'memory': '2560Mi' }, }) }) resources = get_k8s_resource_requirements(tags) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, resources=resources, pod_name=pod_name, component='runmaster', ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=''' resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi''', ).strip())
def test_valid_job_format_with_backcompat_resources(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, }) }) user_defined_k8s_config = get_user_defined_k8s_config(tags) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=["dagster-graphql"], args=[ "-p", "executeRunInProcess", "-v", seven.json.dumps({"runId": run.run_id}), ], job_name=job_name, user_defined_k8s_config=user_defined_k8s_config, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=""" resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi""", ).strip())
def test_valid_job_format_with_user_defined_k8s_config(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ USER_DEFINED_K8S_CONFIG_KEY: ({ "container_config": { "resources": { "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, } }, "pod_template_spec_metadata": { "annotations": { "cluster-autoscaler.kubernetes.io/safe-to-evict": "true" }, "labels": { "spotinst.io/restrict-scale-down": "true" }, }, "pod_spec_config": { "affinity": { "nodeAffinity": { "requiredDuringSchedulingIgnoredDuringExecution": { "nodeSelectorTerms": [{ "matchExpressions": [{ "key": "kubernetes.io/e2e-az-name", "operator": "In", "values": ["e2e-az1", "e2e-az2"], }] }] } } } }, }) }) user_defined_k8s_config = get_user_defined_k8s_config(tags) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=["dagster"], args=["api", "execute_run_with_structured_logs"], job_name=job_name, user_defined_k8s_config=user_defined_k8s_config, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump(remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_CONFIGURED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, labels="spotinst.io/restrict-scale-down: 'true'", resources=""" resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi""", annotations="""annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'""", affinity="""affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: kubernetes.io/e2e-az-name operator: In values: - e2e-az1 - e2e-az2""", ).strip())
def submit_pipeline_execution( self, pipeline_name: str, repository_location_name: Optional[str] = None, repository_name: Optional[str] = None, run_config: Optional[Any] = None, mode: Optional[str] = None, preset: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, ) -> str: """Submits a Pipeline with attached configuration for execution. Args: pipeline_name (str): The pipeline's name repository_location_name (Optional[str], optional): The name of the repository location where the pipeline is located. If omitted, the client will try to infer the repository location from the available options on the Dagster deployment. Defaults to None. repository_name (Optional[str], optional): The name of the repository where the pipeline is located. If omitted, the client will try to infer the repository from the available options on the Dagster deployment. Defaults to None. run_config (Optional[Any], optional): This is the run config to execute the pipeline with. Note that runConfigData is any-typed in the GraphQL type system. This type is used when passing in an arbitrary object for run config. However, it must conform to the constraints of the config schema for this pipeline. If it does not, the client will throw a DagsterGraphQLClientError with a message of PipelineConfigValidationInvalid. Defaults to None. mode (Optional[str], optional): The mode to run the pipeline with. If you have not defined any custom modes for your pipeline, the default mode is "default". Defaults to None. preset (Optional[str], optional): The name of a pre-defined preset to use instead of a run config. Defaults to None. tags (Optional[Dict[str, Any]], optional): A set of tags to add to the pipeline execution. Raises: DagsterGraphQLClientError("InvalidStepError", invalid_step_key): the pipeline has an invalid step DagsterGraphQLClientError("InvalidOutputError", body=error_object): some solid has an invalid output within the pipeline. The error_object is of type dagster_graphql.InvalidOutputErrorInfo. DagsterGraphQLClientError("ConflictingExecutionParamsError", invalid_step_key): a preset and a run_config & mode are present that conflict with one another DagsterGraphQLClientError("PresetNotFoundError", message): if the provided preset name is not found DagsterGraphQLClientError("PipelineRunConflict", message): a `DagsterRunConflict` occured during execution. This indicates that a conflicting pipeline run already exists in run storage. DagsterGraphQLClientError("PipelineConfigurationInvalid", invalid_step_key): the run_config is not in the expected format for the pipeline DagsterGraphQLClientError("PipelineNotFoundError", message): the requested pipeline does not exist DagsterGraphQLClientError("PythonError", message): an internal framework error occurred Returns: str: run id of the submitted pipeline run """ check.opt_str_param(repository_location_name, "repository_location_name") check.opt_str_param(repository_name, "repository_name") check.str_param(pipeline_name, "pipeline_name") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.invariant( (mode is not None and run_config is not None) or preset is not None, "Either a mode and run_config or a preset must be specified in order to " f"submit the pipeline {pipeline_name} for execution", ) tags = validate_tags(tags) if not repository_location_name or not repository_name: pipeline_info_lst = self._get_repo_locations_and_names_with_pipeline( pipeline_name) if len(pipeline_info_lst) == 0: raise DagsterGraphQLClientError( "PipelineNotFoundError", f"No pipelines with the name `{pipeline_name}` exist") elif len(pipeline_info_lst) == 1: pipeline_info = pipeline_info_lst[0] repository_location_name = pipeline_info.repository_location_name repository_name = pipeline_info.repository_name else: raise DagsterGraphQLClientError( "Must specify repository_location_name and repository_name" f" since there are multiple pipelines with the name {pipeline_name}." f"\n\tchoose one of: {pipeline_info_lst}") variables = { "executionParams": { "selector": { "repositoryLocationName": repository_location_name, "repositoryName": repository_name, "pipelineName": pipeline_name, } } } if preset is not None: variables["executionParams"]["preset"] = preset if mode is not None and run_config is not None: variables["executionParams"] = { **variables["executionParams"], "runConfigData": run_config, "mode": mode, "executionMetadata": { "tags": [{ "key": k, "value": v } for k, v in tags.items()] } if tags else {}, } res_data: Dict[str, Any] = self._execute( CLIENT_SUBMIT_PIPELINE_RUN_MUTATION, variables) query_result = res_data["launchPipelineExecution"] query_result_type = query_result["__typename"] if query_result_type == "LaunchPipelineRunSuccess": return query_result["run"]["runId"] elif query_result_type == "InvalidStepError": raise DagsterGraphQLClientError(query_result_type, query_result["invalidStepKey"]) elif query_result_type == "InvalidOutputError": error_info = InvalidOutputErrorInfo( step_key=query_result["stepKey"], invalid_output_name=query_result["invalidOutputName"], ) raise DagsterGraphQLClientError(query_result_type, body=error_info) elif query_result_type == "PipelineConfigValidationInvalid": raise DagsterGraphQLClientError(query_result_type, query_result["errors"]) else: # query_result_type is a ConflictingExecutionParamsError, a PresetNotFoundError # a PipelineNotFoundError, a PipelineRunConflict, or a PythonError raise DagsterGraphQLClientError(query_result_type, query_result["message"])
def define_dagstermill_solid( name, notebook_path, input_defs=None, output_defs=None, config_schema=None, required_resource_keys=None, output_notebook=None, asset_key_prefix=None, description=None, tags=None, ): """Wrap a Jupyter notebook in a solid. Arguments: name (str): The name of the solid. notebook_path (str): Path to the backing notebook. input_defs (Optional[List[InputDefinition]]): The solid's inputs. output_defs (Optional[List[OutputDefinition]]): The solid's outputs. Your notebook should call :py:func:`~dagstermill.yield_result` to yield each of these outputs. required_resource_keys (Optional[Set[str]]): The string names of any required resources. output_notebook (Optional[str]): If set, will be used as the name of an injected output of type :py:class:`~dagster.FileHandle` that will point to the executed notebook (in addition to the :py:class:`~dagster.AssetMaterialization` that is always created). This respects the :py:class:`~dagster.core.storage.file_manager.FileManager` configured on the pipeline resources via the "file_manager" resource key, so, e.g., if :py:class:`~dagster_aws.s3.s3_file_manager` is configured, the output will be a : py:class:`~dagster_aws.s3.S3FileHandle`. asset_key_prefix (Optional[Union[List[str], str]]): If set, will be used to prefix the asset keys for materialized notebooks. description (Optional[str]): If set, description used for solid. tags (Optional[Dict[str, str]]): If set, additional tags used to annotate solid. Dagster uses the tag keys `notebook_path` and `kind`, which cannot be overwritten by the user. Returns: :py:class:`~dagster.SolidDefinition` """ check.str_param(name, "name") check.str_param(notebook_path, "notebook_path") input_defs = check.opt_list_param(input_defs, "input_defs", of_type=InputDefinition) output_defs = check.opt_list_param(output_defs, "output_defs", of_type=OutputDefinition) required_resource_keys = check.opt_set_param(required_resource_keys, "required_resource_keys", of_type=str) if output_notebook is not None: required_resource_keys.add("file_manager") if isinstance(asset_key_prefix, str): asset_key_prefix = [asset_key_prefix] asset_key_prefix = check.opt_list_param(asset_key_prefix, "asset_key_prefix", of_type=str) default_description = f"This solid is backed by the notebook at {notebook_path}" description = check.opt_str_param(description, "description", default=default_description) user_tags = validate_tags(tags) if tags is not None: check.invariant( "notebook_path" not in tags, "user-defined solid tags contains the `notebook_path` key, but the `notebook_path` key is reserved for use by Dagster", ) check.invariant( "kind" not in tags, "user-defined solid tags contains the `kind` key, but the `kind` key is reserved for use by Dagster", ) default_tags = {"notebook_path": notebook_path, "kind": "ipynb"} return SolidDefinition( name=name, input_defs=input_defs, compute_fn=_dm_solid_compute(name, notebook_path, output_notebook, asset_key_prefix=asset_key_prefix), output_defs=output_defs + ([OutputDefinition(dagster_type=FileHandle, name=output_notebook)] if output_notebook else []), config_schema=config_schema, required_resource_keys=required_resource_keys, description=description, tags={ **user_tags, **default_tags }, )
def test_valid_job_format_with_user_defined_k8s_config(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ USER_DEFINED_K8S_CONFIG_KEY: ({ 'container_config': { 'resources': { 'requests': { 'cpu': '250m', 'memory': '64Mi' }, 'limits': { 'cpu': '500m', 'memory': '2560Mi' }, } }, 'pod_template_spec_metadata': { 'annotations': { "cluster-autoscaler.kubernetes.io/safe-to-evict": "true" } }, 'pod_spec_config': { 'affinity': { 'nodeAffinity': { 'requiredDuringSchedulingIgnoredDuringExecution': { 'nodeSelectorTerms': [{ 'matchExpressions': [{ 'key': 'kubernetes.io/e2e-az-name', 'operator': 'In', 'values': ['e2e-az1', 'e2e-az2'], }] }] } } } }, }) }) user_defined_k8s_config = get_user_defined_k8s_config(tags) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, user_defined_k8s_config=user_defined_k8s_config, pod_name=pod_name, component='run_coordinator', ) assert (yaml.dump(remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_CONFIGURED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=''' resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi''', annotations='''annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'''', affinity='''affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: kubernetes.io/e2e-az-name operator: In values: - e2e-az1 - e2e-az2''', ).strip())
def make_dagster_pipeline_from_airflow_dag(dag, tags=None, use_airflow_template_context=False, unique_id=None): '''Construct a Dagster pipeline corresponding to a given Airflow DAG. Tasks in the resulting pipeline will execute the execute() method on the corresponding Airflow Operator. Dagster, any dependencies required by Airflow Operators, and the module containing your DAG definition must be available in the Python environment within which your Dagster solids execute. To set Airflow's `execution_date` for use with Airflow Operator's execute() methods, either (1) (Best for ad hoc runs) Run Pipeline with 'default' preset, which sets execution_date to the time (in UTC) of pipeline invocation ``` execute_pipeline( pipeline=make_dagster_pipeline_from_airflow_dag(dag=dag), preset='default') ``` (2) Add {'airflow_execution_date': utc_date_string} to the PipelineDefinition tags. This will override behavior from (1). ``` execute_pipeline( make_dagster_pipeline_from_airflow_dag( dag=dag, tags={'airflow_execution_date': utc_execution_date_str} ) ) ``` (3) (Recommended) Add {'airflow_execution_date': utc_date_string} to the PipelineRun tags, such as in the Dagit UI. This will override behavior from (1) and (2) We apply normalized_name() to the dag id and task ids when generating pipeline name and solid names to ensure that names conform to Dagster's naming conventions. Args: dag (DAG): The Airflow DAG to compile into a Dagster pipeline tags (Dict[str, Field]): Pipeline tags. Optionally include `tags={'airflow_execution_date': utc_date_string}` to specify execution_date used within execution of Airflow Operators. use_airflow_template_context (bool): If True, will call get_template_context() on the Airflow TaskInstance model which requires and modifies the DagRun table. (default: False) unique_id (int): If not None, this id will be postpended to generated solid names. Used by framework authors to enforce unique solid names within a repo. Returns: pipeline_def (PipelineDefinition): The generated Dagster pipeline ''' check.inst_param(dag, 'dag', DAG) tags = check.opt_dict_param(tags, 'tags') check.bool_param(use_airflow_template_context, 'use_airflow_template_context') unique_id = check.opt_int_param(unique_id, 'unique_id') if IS_AIRFLOW_INGEST_PIPELINE_STR not in tags: tags[IS_AIRFLOW_INGEST_PIPELINE_STR] = 'true' tags = validate_tags(tags) pipeline_dependencies, solid_defs = _get_pipeline_definition_args( dag, use_airflow_template_context, unique_id) pipeline_def = PipelineDefinition( name=normalized_name(dag.dag_id, None), solid_defs=solid_defs, dependencies=pipeline_dependencies, tags=tags, ) return pipeline_def
def define_dagstermill_op( name: str, notebook_path: str, input_defs: Optional[Sequence[InputDefinition]] = None, output_defs: Optional[Sequence[OutputDefinition]] = None, config_schema: Optional[Union[Any, Dict[str, Any]]] = None, required_resource_keys: Optional[Set[str]] = None, output_notebook_name: Optional[str] = None, asset_key_prefix: Optional[Union[List[str], str]] = None, description: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, ): """Wrap a Jupyter notebook in a solid. Arguments: name (str): The name of the solid. notebook_path (str): Path to the backing notebook. input_defs (Optional[List[InputDefinition]]): The solid's inputs. output_defs (Optional[List[OutputDefinition]]): The solid's outputs. Your notebook should call :py:func:`~dagstermill.yield_result` to yield each of these outputs. required_resource_keys (Optional[Set[str]]): The string names of any required resources. output_notebook_name: (Optional[str]): If set, will be used as the name of an injected output of type of :py:class:`~dagster.BufferedIOBase` that is the file object of the executed notebook (in addition to the :py:class:`~dagster.AssetMaterialization` that is always created). It allows the downstream solids to access the executed notebook via a file object. asset_key_prefix (Optional[Union[List[str], str]]): If set, will be used to prefix the asset keys for materialized notebooks. description (Optional[str]): If set, description used for solid. tags (Optional[Dict[str, str]]): If set, additional tags used to annotate solid. Dagster uses the tag keys `notebook_path` and `kind`, which cannot be overwritten by the user. Returns: :py:class:`~dagster.SolidDefinition` """ check.str_param(name, "name") check.str_param(notebook_path, "notebook_path") input_defs = check.opt_list_param(input_defs, "input_defs", of_type=InputDefinition) output_defs = check.opt_list_param(output_defs, "output_defs", of_type=OutputDefinition) required_resource_keys = check.opt_set_param(required_resource_keys, "required_resource_keys", of_type=str) extra_output_defs = [] if output_notebook_name is not None: required_resource_keys.add("output_notebook_io_manager") extra_output_defs.append( OutputDefinition(name=output_notebook_name, io_manager_key="output_notebook_io_manager")) if isinstance(asset_key_prefix, str): asset_key_prefix = [asset_key_prefix] asset_key_prefix = check.opt_list_param(asset_key_prefix, "asset_key_prefix", of_type=str) default_description = f"This op is backed by the notebook at {notebook_path}" description = check.opt_str_param(description, "description", default=default_description) user_tags = validate_tags(tags) if tags is not None: check.invariant( "notebook_path" not in tags, "user-defined solid tags contains the `notebook_path` key, but the `notebook_path` key is reserved for use by Dagster", ) check.invariant( "kind" not in tags, "user-defined solid tags contains the `kind` key, but the `kind` key is reserved for use by Dagster", ) default_tags = {"notebook_path": notebook_path, "kind": "ipynb"} return OpDefinition( name=name, input_defs=input_defs, compute_fn=_dm_compute( "define_dagstermill_op", name, notebook_path, output_notebook_name, asset_key_prefix=asset_key_prefix, ), output_defs=output_defs + extra_output_defs, config_schema=config_schema, required_resource_keys=required_resource_keys, description=description, tags={ **user_tags, **default_tags }, )
def _core_submit_execution( self, pipeline_name: str, repository_location_name: Optional[str] = None, repository_name: Optional[str] = None, run_config: Optional[Any] = None, mode: Optional[str] = None, preset: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, solid_selection: Optional[List[str]] = None, is_using_job_op_graph_apis: Optional[bool] = False, ): check.opt_str_param(repository_location_name, "repository_location_name") check.opt_str_param(repository_name, "repository_name") check.str_param(pipeline_name, "pipeline_name") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") run_config = check.opt_dict_param(run_config, "run_config") # The following invariant will never fail when a job is executed check.invariant( (mode is not None and run_config is not None) or preset is not None, "Either a mode and run_config or a preset must be specified in order to " f"submit the pipeline {pipeline_name} for execution", ) tags = validate_tags(tags) pipeline_or_job = "Job" if is_using_job_op_graph_apis else "Pipeline" if not repository_location_name or not repository_name: pipeline_info_lst = self._get_repo_locations_and_names_with_pipeline( pipeline_name) if len(pipeline_info_lst) == 0: raise DagsterGraphQLClientError( f"{pipeline_or_job}NotFoundError", f"No {'jobs' if is_using_job_op_graph_apis else 'pipelines'} with the name `{pipeline_name}` exist", ) elif len(pipeline_info_lst) == 1: pipeline_info = pipeline_info_lst[0] repository_location_name = pipeline_info.repository_location_name repository_name = pipeline_info.repository_name else: raise DagsterGraphQLClientError( "Must specify repository_location_name and repository_name" f" since there are multiple {'jobs' if is_using_job_op_graph_apis else 'pipelines'} with the name {pipeline_name}." f"\n\tchoose one of: {pipeline_info_lst}") variables: Dict[str, Any] = { "executionParams": { "selector": { "repositoryLocationName": repository_location_name, "repositoryName": repository_name, "pipelineName": pipeline_name, "solidSelection": solid_selection, } } } if preset is not None: variables["executionParams"]["preset"] = preset if mode is not None and run_config is not None: variables["executionParams"] = { **variables["executionParams"], "runConfigData": run_config, "mode": mode, "executionMetadata": { "tags": [{ "key": k, "value": v } for k, v in tags.items()] } if tags else {}, } res_data: Dict[str, Any] = self._execute( CLIENT_SUBMIT_PIPELINE_RUN_MUTATION, variables) query_result = res_data["launchPipelineExecution"] query_result_type = query_result["__typename"] if (query_result_type == "LaunchRunSuccess" or query_result_type == "LaunchPipelineRunSuccess"): return query_result["run"]["runId"] elif query_result_type == "InvalidStepError": raise DagsterGraphQLClientError(query_result_type, query_result["invalidStepKey"]) elif query_result_type == "InvalidOutputError": error_info = InvalidOutputErrorInfo( step_key=query_result["stepKey"], invalid_output_name=query_result["invalidOutputName"], ) raise DagsterGraphQLClientError(query_result_type, body=error_info) elif (query_result_type == "RunConfigValidationInvalid" or query_result_type == "PipelineConfigValidationInvalid"): raise DagsterGraphQLClientError(query_result_type, query_result["errors"]) else: # query_result_type is a ConflictingExecutionParamsError, a PresetNotFoundError # a PipelineNotFoundError, a RunConflict, or a PythonError raise DagsterGraphQLClientError(query_result_type, query_result["message"])