def create_run(instance, **kwargs): # pylint: disable=redefined-outer-name with get_foo_pipeline_handle() as pipeline_handle: create_run_for_test( instance, external_pipeline_origin=pipeline_handle.get_external_origin(), pipeline_name="foo", **kwargs)
def create_run(instance, **kwargs): with get_foo_pipeline_handle() as pipeline_handle: create_run_for_test( instance, external_pipeline_origin=pipeline_handle.get_external_origin(), pipeline_name="foo", **kwargs, )
def test_execute_run_fail_pipeline(): with get_bar_repo_handle() as repo_handle: pipeline_handle = PipelineHandle("fail", repo_handle) runner = CliRunner() with instance_for_test( overrides={ "compute_logs": { "module": "dagster.core.storage.noop_compute_log_manager", "class": "NoOpComputeLogManager", } }) as instance: instance = DagsterInstance.get() run = create_run_for_test(instance, pipeline_name="foo", run_id="new_run") input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_handle.get_python_origin(), pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), )) result = runner_execute_run( runner, [input_json], ) assert result.exit_code == 0 assert "RUN_FAILURE" in result.stdout, "no match, result: {}".format( result) run = create_run_for_test(instance, pipeline_name="foo", run_id="new_run_raise_on_error") input_json_raise_on_failure = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_handle.get_python_origin(), pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=True, )) result = runner.invoke(api.execute_run_command, [input_json_raise_on_failure]) assert result.exit_code != 0, str(result.stdout) assert "RUN_FAILURE" in result.stdout, "no match, result: {}".format( result) # Framework errors (e.g. running a run that has already run) also result in a non-zero error code result = runner.invoke(api.execute_run_command, [input_json_raise_on_failure]) assert result.exit_code != 0, str(result.stdout)
def test_k8s_executor_config_override(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) external_pipeline = get_test_project_external_pipeline( "demo_pipeline", "my_image:tag") # Launch the run in a fake Dagster instance. with instance_for_test() as instance: celery_k8s_run_launcher.register_instance(instance) pipeline_name = "demo_pipeline" # Launch without custom job_image run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config={"execution": { "celery-k8s": {} }}, ) celery_k8s_run_launcher.launch_run(run, external_pipeline) # Launch with custom job_image run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config={ "execution": { "celery-k8s": { "config": { "job_image": "fake-image-name" } } } }, ) celery_k8s_run_launcher.launch_run(run, external_pipeline) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 _, _args, kwargs = mock_method_calls[0] assert kwargs["body"].spec.template.spec.containers[ 0].image == "my_image:tag" _, _args, kwargs = mock_method_calls[1] assert kwargs["body"].spec.template.spec.containers[ 0].image == "fake-image-name"
def create_invalid_run(instance, **kwargs): create_run_for_test( instance, external_pipeline_origin=ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(ModuleCodePointer( "fake", "fake"))), "foo", ), "wrong-pipeline", ), pipeline_name="wrong-pipeline", **kwargs, )
def test_raise_on_error(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, fail_pod_on_run_failure=True, ) # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() loadable_target_origin = LoadableTargetOrigin(python_file=__file__) with instance_for_test() as instance: with in_process_test_workspace(instance, loadable_target_origin) as workspace: location = workspace.get_repository_location(workspace.repository_location_names[0]) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. pipeline_name = "demo_pipeline" run = create_run_for_test( instance, pipeline_name=pipeline_name, external_pipeline_origin=fake_external_pipeline.get_external_origin(), pipeline_code_origin=fake_external_pipeline.get_python_origin(), ) k8s_run_launcher.register_instance(instance) k8s_run_launcher.launch_run(LaunchRunContext(run, workspace)) mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" container = kwargs["body"].spec.template.spec.containers[0] args = container.args assert ( args == ExecuteRunArgs( pipeline_origin=run.pipeline_code_origin, pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), set_exit_code_on_failure=True, ).get_command_args() )
def test_execute_on_celery( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, helm_namespace=helm_namespace), ) pipeline_name = 'demo_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run( run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
def test_execute_on_celery_k8s_default( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "demo_pipeline_celery" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", ) with get_test_project_external_pipeline( pipeline_name) as external_pipeline: dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) updated_run = dagster_instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
def test_single(): instance = DagsterInstance.local_temp() pipeline_name = 'foo_pipeline' pipeline_run = create_run_for_test(instance, pipeline_name=pipeline_name) step_keys = ['A', 'B', 'C'] with instance.compute_log_manager.watch(pipeline_run): print('outer 1') print('outer 2') print('outer 3') for step_key in step_keys: inner_step(instance, pipeline_run, step_key) for step_key in step_keys: stdout = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, step_key, ComputeIOType.STDOUT) assert normalize_file_content( stdout.data) == expected_inner_output(step_key) full_out = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT) assert normalize_file_content(full_out.data).startswith( expected_outer_prefix())
def test_sync_run_launcher_run(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp( temp_dir, overrides={ "run_launcher": { "module": "dagster.core.launcher.sync_in_memory_run_launcher", "class": "SyncInMemoryRunLauncher", } }, ) external_repo = get_main_external_repo(instance) external_pipeline = external_repo.get_full_external_pipeline( "noop_pipeline") run = create_run_for_test(instance=instance, pipeline_name=external_pipeline.name) run = instance.run_launcher.launch_run( instance=instance, run=run, external_pipeline=external_pipeline) completed_run = instance.get_run_by_id(run.run_id) assert completed_run.is_success
def test_k8s_run_launcher(dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher): run_config = load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" with get_test_project_workspace_and_external_pipeline( dagster_instance_for_k8s_run_launcher, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, mode="default", external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result)
def test_k8s_run_launcher_default( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, dagster_docker_image ): # sanity check that we have a K8sRunLauncher check.inst(dagster_instance_for_k8s_run_launcher.run_launcher, K8sRunLauncher) pods = DagsterKubernetesClient.production_client().core_api.list_namespaced_pod( namespace=helm_namespace_for_k8s_run_launcher ) celery_pod_names = [p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name] check.invariant(not celery_pod_names) run_config = merge_dicts( load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env.yaml")), load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "execution": { "k8s": { "config": { "job_namespace": helm_namespace_for_k8s_run_launcher, "job_image": dagster_docker_image, "image_pull_policy": image_pull_policy(), "env_config_maps": ["dagster-pipeline-env"] + ([TEST_AWS_CONFIGMAP_NAME] if not IS_BUILDKITE else []), } } }, }, ) pipeline_name = "demo_k8s_executor_pipeline" tags = {"key": "value"} with get_test_project_location_and_external_pipeline(pipeline_name) as ( location, external_pipeline, ): run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, tags=tags, mode="default", pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=location.get_external_execution_plan( external_pipeline, run_config, "default", None, None ).execution_plan_snapshot, ) dagster_instance_for_k8s_run_launcher.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher ) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result) updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image()
def test_multi(): instance = DagsterInstance.local_temp() pipeline_name = 'foo_pipeline' pipeline_run = create_run_for_test(instance, pipeline_name=pipeline_name) context = get_multiprocessing_context() step_keys = ['A', 'B', 'C'] with instance.compute_log_manager.watch(pipeline_run): print('outer 1') print('outer 2') print('outer 3') for step_key in step_keys: process = context.Process(target=execute_inner, args=(step_key, pipeline_run, instance.get_ref())) process.start() process.join() for step_key in step_keys: stdout = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, step_key, ComputeIOType.STDOUT) assert normalize_file_content( stdout.data) == expected_inner_output(step_key) full_out = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT) # The way that the multiprocess compute-logging interacts with pytest (which stubs out the # sys.stdout fileno) makes this difficult to test. The pytest-captured stdout only captures # the stdout from the outer process, not also the inner process assert normalize_file_content(full_out.data).startswith( expected_outer_prefix())
def test_k8s_run_launcher_default( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher ): # sanity check that we have a K8sRunLauncher check.inst(dagster_instance_for_k8s_run_launcher.run_launcher, K8sRunLauncher) pods = DagsterKubernetesClient.production_client().core_api.list_namespaced_pod( namespace=helm_namespace_for_k8s_run_launcher ) celery_pod_names = [p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name] check.invariant(not celery_pod_names) run_config = load_yaml_from_path(os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" tags = {"key": "value"} run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, tags=tags, mode="default", ) dagster_instance_for_k8s_run_launcher.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher ) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): environment_dict = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(), ) assert 'celery-k8s' in environment_dict['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, environment_dict=environment_dict, tags=tags, mode='default', ) dagster_instance.launch_run(run.run_id) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert (result['data']['startPipelineExecutionForCreatedRun']['__typename'] == 'StartPipelineRunSuccess')
def test_submit_run(): with instance_for_test( overrides={ "run_coordinator": { "module": "dagster.core.test_utils", "class": "MockedRunCoordinator", } } ) as instance: with get_bar_workspace(instance) as workspace: external_pipeline = ( workspace.get_repository_location("bar_repo_location") .get_repository("bar_repo") .get_full_external_pipeline("foo") ) run = create_run_for_test( instance=instance, pipeline_name=external_pipeline.name, run_id="foo-bar", external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.submit_run(run.run_id, workspace) assert len(instance.run_coordinator.queue()) == 1 assert instance.run_coordinator.queue()[0].run_id == "foo-bar"
def test_execute_run(): with get_foo_pipeline_handle() as pipeline_handle: runner = CliRunner() with instance_for_test( overrides={ "compute_logs": { "module": "dagster.core.storage.noop_compute_log_manager", "class": "NoOpComputeLogManager", } }) as instance: instance = DagsterInstance.get() run = create_run_for_test(instance, pipeline_name="foo", run_id="new_run") input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_handle.get_python_origin(), pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), )) result = runner_execute_run( runner, [input_json], ) assert "PIPELINE_SUCCESS" in result.stdout, "no match, result: {}".format( result.stdout) # Framework errors (e.g. running a run that has already run) still result in a non-zero error code result = runner.invoke(api.execute_run_command, [input_json]) assert result.exit_code == 0
def test_execute_run_with_structured_logs(): with get_foo_pipeline_handle() as pipeline_handle: runner = CliRunner() with instance_for_test( overrides={ "compute_logs": { "module": "dagster.core.storage.noop_compute_log_manager", "class": "NoOpComputeLogManager", } } ) as instance: instance = DagsterInstance.get() run = create_run_for_test(instance, pipeline_name="foo", run_id="new_run") input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_handle.get_origin(), pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), ) ) result = runner_execute_run_with_structured_logs(runner, [input_json],) assert "PIPELINE_SUCCESS" in result.stdout, "no match, result: {}".format(result)
def test_execute_on_celery_resource_requirements( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace ): run_config = merge_dicts( merge_yamls([os.path.join(test_project_environments_path(), 'env_s3.yaml'),]), { 'execution': { 'celery-k8s': { 'config': { 'job_image': dagster_docker_image, 'job_namespace': helm_namespace, 'image_pull_policy': 'Always', 'env_config_maps': ['dagster-pipeline-env'], } } }, }, ) pipeline_name = 'resources_limit_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run(run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs( job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace ) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess' ), 'no match, result: {}'.format(result)
def test_sync_run_launcher_run(): with instance_for_test(overrides={ "run_launcher": { "module": "dagster.core.launcher.sync_in_memory_run_launcher", "class": "SyncInMemoryRunLauncher", } }, ) as instance: with get_main_workspace(instance) as workspace: location = workspace.get_repository_location( main_repo_location_name()) external_repo = location.get_repository(main_repo_name()) external_pipeline = external_repo.get_full_external_pipeline( "noop_pipeline") run = create_run_for_test( instance=instance, pipeline_name=external_pipeline.name, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=external_pipeline.get_python_origin(), ) run = instance.launch_run(run_id=run.run_id, workspace=workspace) completed_run = instance.get_run_by_id(run.run_id) assert completed_run.is_success
def test_execute_subset_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_subset.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "demo_pipeline_celery" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", solids_to_execute={"count_letters"}, ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_failing_k8s_run_launcher(dagster_instance, helm_namespace): run_config = {"blah blah this is wrong": {}} pipeline_name = "demo_pipeline" run = create_run_for_test(dagster_instance, pipeline_name=pipeline_name, run_config=run_config) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" not in result, "no match, result: {}".format( result) event_records = dagster_instance.all_logs(run.run_id) assert any([ 'Undefined field "blah blah this is wrong"' in str(event) for event in event_records ]) assert any([ 'Missing required field "solids"' in str(event) for event in event_records ])
def test_execute_step_with_structured_logs(pipeline_handle): runner = CliRunner() with instance_for_test( overrides={ "compute_logs": { "module": "dagster.core.storage.noop_compute_log_manager", "class": "NoOpComputeLogManager", } }) as instance: run = create_run_for_test(instance, pipeline_name="foo", run_id="new_run") input_json = serialize_dagster_namedtuple( ExecuteStepArgs( pipeline_origin=pipeline_handle.get_origin(), pipeline_run_id=run.run_id, instance_ref=instance.get_ref(), )) result = runner_execute_step_with_structured_logs( runner, [input_json], ) assert "STEP_SUCCESS" in result.stdout
def test_step_handler(kubeconfig_file): mock_k8s_client_batch_api = mock.MagicMock() handler = K8sStepHandler( job_config=DagsterK8sJobConfig(instance_config_map="foobar", job_image="bizbuz"), job_namespace="foo", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) with instance_for_test() as instance: run = create_run_for_test( instance, pipeline_name="bar", ) handler.launch_step( StepHandlerContext( instance, ExecuteStepArgs( reconstructable(bar).get_python_origin(), run.run_id, ["foo_solid"] ), {"foo_solid": {}}, ) ) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" assert kwargs["body"].spec.template.spec.containers[0].image == "bizbuz"
def test_execute_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = 'demo_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result)
def test_multi(): with instance_for_test() as instance: pipeline_name = "foo_pipeline" pipeline_run = create_run_for_test(instance, pipeline_name=pipeline_name) step_keys = ["A", "B", "C"] with instance.compute_log_manager.watch(pipeline_run): print("outer 1") # pylint: disable=print-call print("outer 2") # pylint: disable=print-call print("outer 3") # pylint: disable=print-call for step_key in step_keys: process = multiprocessing.Process(target=execute_inner, args=(step_key, pipeline_run, instance.get_ref())) process.start() process.join() for step_key in step_keys: stdout = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, step_key, ComputeIOType.STDOUT) assert normalize_file_content( stdout.data) == expected_inner_output(step_key) full_out = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT) # The way that the multiprocess compute-logging interacts with pytest (which stubs out the # sys.stdout fileno) makes this difficult to test. The pytest-captured stdout only captures # the stdout from the outer process, not also the inner process assert normalize_file_content(full_out.data).startswith( expected_outer_prefix())
def test_single(): with instance_for_test() as instance: pipeline_name = "foo_pipeline" pipeline_run = create_run_for_test(instance, pipeline_name=pipeline_name) step_keys = ["A", "B", "C"] with instance.compute_log_manager.watch(pipeline_run): print("outer 1") # pylint: disable=print-call print("outer 2") # pylint: disable=print-call print("outer 3") # pylint: disable=print-call for step_key in step_keys: inner_step(instance, pipeline_run, step_key) for step_key in step_keys: stdout = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, step_key, ComputeIOType.STDOUT) assert normalize_file_content( stdout.data) == expected_inner_output(step_key) full_out = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT) assert normalize_file_content(full_out.data).startswith( expected_outer_prefix())
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(), ) assert 'celery-k8s' in run_config['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, tags=tags, mode='default', ) dagster_instance.launch_run( run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
def test_k8s_run_launcher_with_celery_executor_fails( dagster_docker_image, dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace_for_k8s_run_launcher, ), ) pipeline_name = "demo_pipeline_celery" with get_test_project_location_and_external_pipeline(pipeline_name) as ( location, external_pipeline, ): run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, mode="default", pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=location.get_external_execution_plan( external_pipeline, run_config, "default", None, None).execution_plan_snapshot, ) dagster_instance_for_k8s_run_launcher.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) timeout = datetime.timedelta(0, 120) found_pipeline_failure = False start_time = datetime.datetime.now() while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance_for_k8s_run_launcher.all_logs( run.run_id) for event_record in event_records: if event_record.dagster_event: if (event_record.dagster_event.event_type == DagsterEventType.PIPELINE_INIT_FAILURE): found_pipeline_failure = True if found_pipeline_failure: break time.sleep(5) assert found_pipeline_failure assert (dagster_instance_for_k8s_run_launcher.get_run_by_id( run.run_id).status == PipelineRunStatus.FAILURE)
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) # Construct Dagster run tags with user defined k8s config. expected_resources = { "requests": {"cpu": "250m", "memory": "64Mi"}, "limits": {"cpu": "500m", "memory": "2560Mi"}, } user_defined_k8s_config = UserDefinedDagsterK8sConfig( container_config={"resources": expected_resources}, ) user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict()) tags = {"dagster-k8s/config": user_defined_k8s_config_json} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository location_origin = InProcessRepositoryLocationOrigin(recon_repo) location_handle = location_origin.create_handle() repo_def = recon_repo.get_definition() repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location_handle=location_handle, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. with instance_for_test() as instance: celery_k8s_run_launcher.initialize(instance) pipeline_name = "demo_pipeline" run_config = {"execution": {"celery-k8s": {"config": {"job_image": "fake-image-name"}}}} run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config=run_config, tags=tags, ) celery_k8s_run_launcher.launch_run(instance, run, fake_external_pipeline) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" job_resources = kwargs["body"].spec.template.spec.containers[0].resources assert job_resources == expected_resources