def sync_get_external_repositories_grpc(api_client, repository_location_handle): check.inst_param(repository_location_handle, 'repository_location_handle', RepositoryLocationHandle) repos = [] for repository_name in repository_location_handle.repository_names: external_repository_data = check.inst( api_client.external_repository( repository_grpc_server_origin=RepositoryGrpcServerOrigin( repository_location_handle.host, repository_location_handle.port, repository_location_handle.socket, repository_name, )), ExternalRepositoryData, ) repos.append( ExternalRepository( external_repository_data, RepositoryHandle( repository_name=external_repository_data.name, repository_location_handle=repository_location_handle, ), )) return repos
def sync_get_streaming_external_repositories_grpc(api_client, repository_location_handle): check.inst_param( repository_location_handle, "repository_location_handle", RepositoryLocationHandle ) repos = [] for repository_name in repository_location_handle.repository_names: external_repository_chunks = list( api_client.streaming_external_repository( external_repository_origin=ExternalRepositoryOrigin( repository_location_handle.origin, repository_name, ) ) ) external_repository_data = deserialize_json_to_dagster_namedtuple( "".join( [ chunk["serialized_external_repository_chunk"] for chunk in external_repository_chunks ] ) ) repos.append( ExternalRepository( external_repository_data, RepositoryHandle( repository_name=external_repository_data.name, repository_location_handle=repository_location_handle, ), ) ) return repos
def sync_get_external_repositories(repository_location_handle): check.inst_param( repository_location_handle, 'repository_location_handle', PythonEnvRepositoryLocationHandle, ) repos = [] for key, pointer in repository_location_handle.repository_code_pointer_dict.items(): external_repository_data = check.inst( execute_unary_api_cli_command( repository_location_handle.executable_path, 'repository', RepositoryPythonOrigin(repository_location_handle.executable_path, pointer), ), ExternalRepositoryData, ) repos.append( ExternalRepository( external_repository_data, RepositoryHandle( repository_name=external_repository_data.name, repository_key=key, repository_location_handle=repository_location_handle, ), ) ) return repos
def test_raise_on_error(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, fail_pod_on_run_failure=True, ) # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() loadable_target_origin = LoadableTargetOrigin(python_file=__file__) with instance_for_test() as instance: with in_process_test_workspace(instance, loadable_target_origin) as workspace: location = workspace.get_repository_location(workspace.repository_location_names[0]) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. pipeline_name = "demo_pipeline" run = create_run_for_test( instance, pipeline_name=pipeline_name, external_pipeline_origin=fake_external_pipeline.get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin(), ) k8s_run_launcher.register_instance(instance) k8s_run_launcher.launch_run(LaunchRunContext(run, workspace)) mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] args = container.args assert ( args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=True, ).get_command_args() )
def sync_get_external_repositories_grpc(repository_location_handle): check.inst_param( repository_location_handle, 'repository_location_handle', PythonEnvRepositoryLocationHandle, ) repos = [] with ephemeral_grpc_api_client( python_executable_path=repository_location_handle.executable_path ) as api_client: for key, pointer in repository_location_handle.repository_code_pointer_dict.items(): external_repository_data = check.inst( api_client.external_repository( repository_python_origin=RepositoryPythonOrigin( repository_location_handle.executable_path, pointer ) ), ExternalRepositoryData, ) repos.append( ExternalRepository( external_repository_data, RepositoryHandle( repository_name=external_repository_data.name, repository_key=key, repository_location_handle=repository_location_handle, ), ) ) return repos
def sync_get_external_repositories_grpc(api_client, repository_location_handle): check.inst_param( repository_location_handle, "repository_location_handle", RepositoryLocationHandle ) repos = [] for repository_name in repository_location_handle.repository_names: external_repository_data = check.inst( api_client.external_repository( external_repository_origin=ExternalRepositoryOrigin( repository_location_handle.origin, repository_name, ) ), ExternalRepositoryData, ) repos.append( ExternalRepository( external_repository_data, RepositoryHandle( repository_name=external_repository_data.name, repository_location_handle=repository_location_handle, ), ) ) return repos
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) # Construct Dagster run tags with user defined k8s config. expected_resources = { "requests": {"cpu": "250m", "memory": "64Mi"}, "limits": {"cpu": "500m", "memory": "2560Mi"}, } user_defined_k8s_config = UserDefinedDagsterK8sConfig( container_config={"resources": expected_resources}, ) user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict()) tags = {"dagster-k8s/config": user_defined_k8s_config_json} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository location_origin = InProcessRepositoryLocationOrigin(recon_repo) location_handle = location_origin.create_handle() repo_def = recon_repo.get_definition() repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location_handle=location_handle, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. with instance_for_test() as instance: celery_k8s_run_launcher.initialize(instance) pipeline_name = "demo_pipeline" run_config = {"execution": {"celery-k8s": {"config": {"job_image": "fake-image-name"}}}} run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config=run_config, tags=tags, ) celery_k8s_run_launcher.launch_run(instance, run, fake_external_pipeline) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" job_resources = kwargs["body"].spec.template.spec.containers[0].resources assert job_resources == expected_resources
def test_no_postgres(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() with instance_for_test() as instance: with in_process_test_workspace(instance, recon_repo) as workspace: location = workspace.get_repository_location( workspace.repository_location_names[0]) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. pipeline_name = "demo_pipeline" run = create_run_for_test( instance, pipeline_name=pipeline_name, external_pipeline_origin=fake_external_pipeline. get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin( ), ) k8s_run_launcher.register_instance(instance) k8s_run_launcher.launch_run(LaunchRunContext(run, workspace)) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" assert DAGSTER_PG_PASSWORD_ENV_VAR not in [ env.name for env in kwargs["body"].spec.template.spec.containers[0].env ]
def test_check_run_health(kubeconfig_file): labels = {"foo_label_key": "bar_label_value"} # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.Mock(spec_set=["read_namespaced_job"]) mock_k8s_client_batch_api.read_namespaced_job.side_effect = [ V1Job(status=V1JobStatus(failed=0, succeeded=0)), V1Job(status=V1JobStatus(failed=0, succeeded=1)), V1Job(status=V1JobStatus(failed=1, succeeded=0)), ] k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, labels=labels, ) # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() loadable_target_origin = LoadableTargetOrigin(python_file=__file__) with instance_for_test() as instance: with in_process_test_workspace(instance, loadable_target_origin) as workspace: location = workspace.get_repository_location(workspace.repository_location_names[0]) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. pipeline_name = "demo_pipeline" run = create_run_for_test( instance, pipeline_name=pipeline_name, external_pipeline_origin=fake_external_pipeline.get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin(), ) k8s_run_launcher.register_instance(instance) # same order as side effects assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.RUNNING assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.SUCCESS assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.FAILED
def get_full_external_pipeline(repo_yaml, pipeline_name): recon_repo = ReconstructableRepository.from_yaml(repo_yaml) repo_def = recon_repo.get_definition() external_repo = external_repo_from_def( repo_def, RepositoryHandle( repo_def.name, LocationHandle('<<MOCK_FOR_TEST>>', recon_repo.pointer), ), ) return external_repo.get_full_external_pipeline(pipeline_name)
def __init__(self, recon_repo): self._recon_repo = check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) self._handle = RepositoryLocationHandle.create_in_process_location(recon_repo.pointer) repo_def = recon_repo.get_definition() def_name = repo_def.name self._external_repo = external_repo_from_def( repo_def, RepositoryHandle(repository_name=def_name, repository_location_handle=self._handle), ) self._repositories = {self._external_repo.name: self._external_repo}
def __init__(self, handle): self._handle = check.inst_param(handle, "handle", InProcessRepositoryLocationHandle,) self._recon_repo = self._handle.origin.recon_repo repo_def = self._recon_repo.get_definition() def_name = repo_def.name self._external_repo = external_repo_from_def( repo_def, RepositoryHandle(repository_name=def_name, repository_location_handle=self._handle), ) self._repositories = {self._external_repo.name: self._external_repo}
def __init__(self, recon_repo, reloader=None): self._recon_repo = check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) self._handle = LocationHandle(self.name, recon_repo.pointer) self._external_repo = external_repo_from_def( recon_repo.get_definition(), RepositoryHandle(recon_repo.get_definition().name, self._handle), ) self._repositories = {self._external_repo.name: self._external_repo} self.reloader = check.opt_inst_param(reloader, 'reloader', Reloader)
def sync_get_external_repositories(repository_location_handle): check.inst_param( repository_location_handle, 'repository_location_handle', RepositoryLocationHandle, ) check.param_invariant( isinstance(repository_location_handle, PythonEnvRepositoryLocationHandle), 'repository_location_handle', ) repos = [] for key, pointer in repository_location_handle.repository_code_pointer_dict.items(): with get_temp_file_name() as output_file: parts = [ repository_location_handle.executable_path, '-m', 'dagster', 'api', 'snapshot', 'repository', output_file, ] + xplat_shlex_split(pointer.get_cli_args()) execute_command_in_subprocess(parts) external_repository_data = read_unary_response(output_file) check.inst(external_repository_data, ExternalRepositoryData) repository_handle = RepositoryHandle( repository_name=external_repository_data.name, repository_key=key, repository_location_handle=repository_location_handle, ) repos.append(ExternalRepository(external_repository_data, repository_handle)) return repos
def sync_get_external_repository(location_handle): check.inst_param(location_handle, 'location_handle', LocationHandle) with get_temp_file_name() as output_file: parts = ['dagster', 'api', 'snapshot', 'repository', output_file] + xplat_shlex_split( location_handle.pointer.get_cli_args() ) returncode = subprocess.check_call(parts) check.invariant(returncode == 0, 'dagster api cli invocation did not complete successfully') messages = list(ipc_read_event_stream(output_file)) check.invariant(len(messages) == 1) external_repository_data = messages[0] check.inst(external_repository_data, ExternalRepositoryData) return ExternalRepository( external_repository_data, RepositoryHandle(external_repository_data.name, location_handle), )
def _execute_backfill_command_at_location(cli_args, print_fn, instance, workspace, repo_location): external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get("pipeline"), ) noprompt = cli_args.get("noprompt") pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( "No partition sets found for pipeline `{}`".format(external_pipeline.name) ) partition_set_name = cli_args.get("partition_set") if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter(pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError("No partition set specified (see option `--partition-set`)") else: partition_set_name = click.prompt( "Select a partition set to use for backfill: {}".format( ", ".join(x for x in pipeline_partition_set_names.keys()) ) ) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError("No partition set found named `{}`".format(partition_set_name)) run_tags = get_tags_from_args(cli_args) repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location=repo_location, ) try: partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) raise DagsterBackfillFailedError( "Failure fetching partition names for {partition_set_name}: {error_message}".format( partition_set_name=partition_set_name, error_message=error_info.message, ), serialized_error_info=error_info, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args ) # Print backfill info print_fn("\n Pipeline: {}".format(external_pipeline.name)) print_fn("Partition set: {}".format(partition_set_name)) print_fn(" Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names)) ): print_fn("Launching runs... ") backfill_id = make_new_backfill_id() backfill_job = PartitionBackfill( backfill_id=backfill_id, partition_set_origin=partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=partition_names, from_failure=False, reexecution_steps=None, tags=run_tags, backfill_timestamp=pendulum.now("UTC").timestamp(), ) try: partition_execution_data = ( repo_location.get_external_partition_set_execution_param_data( repository_handle=repo_handle, partition_set_name=partition_set_name, partition_names=partition_names, ) ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) instance.add_backfill( backfill_job.with_status(BulkActionStatus.FAILED).with_error(error_info) ) return print_fn("Backfill failed: {}".format(error_info)) assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData) for partition_data in partition_execution_data.partition_data: pipeline_run = create_backfill_run( instance, repo_location, external_pipeline, partition_set, backfill_job, partition_data, ) if pipeline_run: instance.submit_run(pipeline_run.run_id, workspace) instance.add_backfill(backfill_job.with_status(BulkActionStatus.COMPLETED)) print_fn("Launched backfill job `{}`".format(backfill_id)) else: print_fn("Aborted!")
def execute_backfill_command(cli_args, print_fn, instance=None): instance = instance or DagsterInstance.get() repo_location = get_repository_location_from_kwargs(cli_args, instance) external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get('repository')) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get('pipeline'), ) noprompt = cli_args.get('noprompt') pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format( external_pipeline.name)) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter( pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError( 'No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x for x in pipeline_partition_set_names.keys()))) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError( 'No partition set found named `{}`'.format(partition_set_name)) mode = partition_set.mode solid_selection = partition_set.solid_selection repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location_handle=repo_location.location_handle, ) # Resolve partitions to backfill partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching partition names for {partition_set_name}: {error_message}' .format( partition_set_name=partition_set_name, error_message=partition_names_or_error.error.message, ), serialized_error_info=partition_names_or_error.error, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(external_pipeline.name)) print_fn('Partition set: {}'.format(partition_set_name)) print_fn(' Partitions: {}\n'.format( print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format( len(partition_names))): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition_name in partition_names: run_config_or_error = repo_location.get_external_partition_config( repo_handle, partition_set_name, partition_name) if isinstance(run_config_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching run config for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=run_config_or_error.error.message, ), serialized_error_info=run_config_or_error.error, ) tags_or_error = repo_location.get_external_partition_tags( repo_handle, partition_set_name, partition_name) if isinstance(tags_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching tags for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=tags_or_error.error.message, ), serialized_error_info=tags_or_error.error, ) run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=run_config_or_error.run_config, mode=mode, preset=None, tags=merge_dicts(tags_or_error.tags, run_tags), solid_selection=frozenset(solid_selection) if solid_selection else None, ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn('Aborted!')
def test_out_of_process_reload_location(self, graphql_context): result = execute_dagster_graphql(graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}) assert result assert result.data assert result.data["reloadRepositoryLocation"] assert result.data["reloadRepositoryLocation"][ "__typename"] == "RepositoryLocation" assert result.data["reloadRepositoryLocation"]["name"] == "test" assert result.data["reloadRepositoryLocation"]["repositories"] == [{ "name": "test_repo" }] assert result.data["reloadRepositoryLocation"][ "isReloadSupported"] is True with mock.patch( # note it where the function is *used* that needs to mocked, not # where it is defined. # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch "dagster.core.host_representation.handle.sync_list_repositories_grpc" ) as cli_command_mock: with mock.patch( # note it where the function is *used* that needs to mocked, not # where it is defined. # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch "dagster.core.host_representation.repository_location.sync_get_streaming_external_repositories_grpc" ) as external_repository_mock: @repository def new_repo(): return [] new_repo_data = external_repository_data_from_def(new_repo) external_repository_mock.return_value = [ ExternalRepository( new_repo_data, RepositoryHandle( "new_repo", graphql_context. repository_locations[0].location_handle), ) ] cli_command_mock.return_value = ListRepositoriesResponse( repository_symbols=[], executable_path=sys.executable, repository_code_pointer_dict={ "new_repo": CodePointer.from_python_file(__file__, "new_repo", None) }, ) result = execute_dagster_graphql( graphql_context, RELOAD_REPOSITORY_LOCATION_QUERY, {"repositoryLocationName": "test"}, ) assert cli_command_mock.call_count == 1 assert external_repository_mock.call_count == 1 assert result.data["reloadRepositoryLocation"][ "repositories"] == [{ "name": "new_repo" }]
def test_launcher_with_container_context(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, env_vars=["FOO_TEST"], ) container_context_config = {"k8s": {"env_vars": ["BAR_TEST"]}} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() python_origin = recon_pipeline.get_python_origin() python_origin = python_origin._replace( repository_origin=python_origin.repository_origin._replace( container_context=container_context_config, ) ) loadable_target_origin = LoadableTargetOrigin(python_file=__file__) with instance_for_test() as instance: with in_process_test_workspace(instance, loadable_target_origin) as workspace: location = workspace.get_repository_location(workspace.repository_location_names[0]) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. pipeline_name = "demo_pipeline" run = create_run_for_test( instance, pipeline_name=pipeline_name, external_pipeline_origin=fake_external_pipeline.get_external_origin(), pipeline_code_origin=python_origin, ) k8s_run_launcher.register_instance(instance) k8s_run_launcher.launch_run(LaunchRunContext(run, workspace)) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] env_names = [env.name for env in container.env] assert "BAR_TEST" in env_names assert "FOO_TEST" in env_names args = container.args assert ( args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=None, ).get_command_args() )
def _execute_backfill_command_at_location(cli_args, print_fn, instance, repo_location): external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get("pipeline"), ) noprompt = cli_args.get("noprompt") pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( "No partition sets found for pipeline `{}`".format(external_pipeline.name) ) partition_set_name = cli_args.get("partition_set") if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter(pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError("No partition set specified (see option `--partition-set`)") else: partition_set_name = click.prompt( "Select a partition set to use for backfill: {}".format( ", ".join(x for x in pipeline_partition_set_names.keys()) ) ) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError("No partition set found named `{}`".format(partition_set_name)) mode = partition_set.mode solid_selection = partition_set.solid_selection run_tags = get_tags_from_args(cli_args) repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location_handle=repo_location.location_handle, ) # Resolve partitions to backfill partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( "Failure fetching partition names for {partition_set_name}: {error_message}".format( partition_set_name=partition_set_name, error_message=partition_names_or_error.error.message, ), serialized_error_info=partition_names_or_error.error, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args ) # Print backfill info print_fn("\n Pipeline: {}".format(external_pipeline.name)) print_fn("Partition set: {}".format(partition_set_name)) print_fn(" Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names)) ): print_fn("Launching runs... ") backfill_id = make_new_backfill_id() backfill_tags = PipelineRun.tags_for_backfill_id(backfill_id) partition_execution_data = repo_location.get_external_partition_set_execution_param_data( repository_handle=repo_handle, partition_set_name=partition_set_name, partition_names=partition_names, ) if isinstance(partition_execution_data, ExternalPartitionExecutionErrorData): return print_fn("Backfill failed: {}".format(partition_execution_data.error)) assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData) for partition_data in partition_execution_data.partition_data: run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=partition_data.run_config, mode=mode, preset=None, tags=merge_dicts(merge_dicts(partition_data.tags, backfill_tags), run_tags), solid_selection=frozenset(solid_selection) if solid_selection else None, ) instance.launch_run(run.run_id, external_pipeline) print_fn("Launched backfill job `{}`".format(backfill_id)) else: print_fn("Aborted!")
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) # Construct Dagster run tags with user defined k8s config. expected_resources = { "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, } user_defined_k8s_config = UserDefinedDagsterK8sConfig( container_config={"resources": expected_resources}, ) user_defined_k8s_config_json = json.dumps( user_defined_k8s_config.to_dict()) tags = {"dagster-k8s/config": user_defined_k8s_config_json} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() location_origin = InProcessRepositoryLocationOrigin(recon_repo) with location_origin.create_location() as location: repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. with instance_for_test() as instance: pipeline_name = "demo_pipeline" run = create_run_for_test(instance, pipeline_name=pipeline_name, tags=tags) k8s_run_launcher.register_instance(instance) run = k8s_run_launcher.launch_run(run, fake_external_pipeline) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" job_resources = kwargs["body"].spec.template.spec.containers[ 0].resources assert job_resources == expected_resources assert DAGSTER_PG_PASSWORD_ENV_VAR in [ env.name for env in kwargs["body"].spec.template.spec.containers[0].env ]
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file): labels = {"foo_label_key": "bar_label_value"} # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, labels=labels, ) # Construct Dagster run tags with user defined k8s config. expected_resources = { "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, } user_defined_k8s_config = UserDefinedDagsterK8sConfig( container_config={"resources": expected_resources}, ) user_defined_k8s_config_json = json.dumps( user_defined_k8s_config.to_dict()) tags = {"dagster-k8s/config": user_defined_k8s_config_json} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository with instance_for_test() as instance: with in_process_test_workspace(instance, recon_repo) as workspace: location = workspace.get_repository_location( workspace.repository_location_names[0]) repo_def = recon_repo.get_definition() repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) celery_k8s_run_launcher.register_instance(instance) pipeline_name = "demo_pipeline" run_config = { "execution": { "celery-k8s": { "config": { "job_image": "fake-image-name" } } } } run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config=run_config, tags=tags, external_pipeline_origin=fake_external_pipeline. get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin( ), ) celery_k8s_run_launcher.launch_run(LaunchRunContext( run, workspace)) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake-image-name" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] job_resources = container.resources assert job_resources == expected_resources labels = kwargs["body"].spec.template.metadata.labels assert labels["foo_label_key"] == "bar_label_value" args = container.args assert (args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=None, ).get_command_args())
def test_raise_on_error(kubeconfig_file): mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, fail_pod_on_run_failure=True, ) # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository with instance_for_test() as instance: with in_process_test_workspace(instance, recon_repo) as workspace: location = workspace.get_repository_location( workspace.repository_location_names[0]) repo_def = recon_repo.get_definition() repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) celery_k8s_run_launcher.register_instance(instance) pipeline_name = "demo_pipeline" run_config = { "execution": { "celery-k8s": { "config": { "job_image": "fake-image-name" } } } } run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config=run_config, external_pipeline_origin=fake_external_pipeline. get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin( ), ) celery_k8s_run_launcher.launch_run(LaunchRunContext( run, workspace)) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] args = container.args assert (args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=True, ).get_command_args())