def test_k8s_run_launcher_default(dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher): # sanity check that we have a K8sRunLauncher check.inst(dagster_instance_for_k8s_run_launcher.run_launcher, K8sRunLauncher) pods = DagsterKubernetesClient.production_client( ).core_api.list_namespaced_pod( namespace=helm_namespace_for_k8s_run_launcher) celery_pod_names = [ p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name ] check.invariant(not celery_pod_names) run_config = load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" tags = {"key": "value"} with get_test_project_location_and_external_pipeline(pipeline_name) as ( location, external_pipeline, ): run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, tags=tags, mode="default", pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=location.get_external_execution_plan( external_pipeline, run_config, "default", None, None).execution_plan_snapshot, ) dagster_instance_for_k8s_run_launcher.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id( run.run_id) assert updated_run.tags[ DOCKER_IMAGE_TAG] == get_test_project_docker_image()
def test_k8s_executor_resource_requirements( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, dagster_docker_image, dagit_url_for_k8s_run_launcher, ): # sanity check that we have a K8sRunLauncher pods = DagsterKubernetesClient.production_client( ).core_api.list_namespaced_pod( namespace=helm_namespace_for_k8s_run_launcher) celery_pod_names = [ p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name ] check.invariant(not celery_pod_names) run_config = merge_dicts( load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "execution": { "k8s": { "config": { "job_namespace": helm_namespace_for_k8s_run_launcher, "job_image": dagster_docker_image, "image_pull_policy": image_pull_policy(), } } }, }, ) pipeline_name = "resources_limit_pipeline" run_id = launch_run_over_graphql( dagit_url_for_k8s_run_launcher, run_config=run_config, pipeline_name=pipeline_name, mode="k8s", ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run_id, namespace=helm_namespace_for_k8s_run_launcher) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result) updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image( )
def test_execute_subset_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, helm_namespace, dagit_url): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_subset.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) run_id = launch_run_over_graphql( dagit_url, run_config=run_config, pipeline_name="demo_pipeline_celery", solid_selection=["count_letters"], ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_execute_on_celery_k8s_with_env_var_and_termination( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"}, ), ) _test_termination(dagster_instance, run_config)
def test_valid_job_format_with_backcompat_resources(run_launcher): docker_image = get_test_project_docker_image() run_config = load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, }) }) user_defined_k8s_config = get_user_defined_k8s_config(tags) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.get_static_job_config(), args=["dagster", "api", "execute_run"], job_name=job_name, user_defined_k8s_config=user_defined_k8s_config, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, env_from=ENV_FROM, resources=""" resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi""", ).strip())
def test_execute_on_celery_k8s_with_termination( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url, ): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) _test_termination(dagit_url, dagster_instance, run_config)
def test_k8s_executor_get_config_from_run_launcher( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, dagster_docker_image): # Verify that if you do not specify executor config it is delegated by the run launcher run_config = merge_dicts( load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env.yaml")), load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "execution": { "k8s": { "config": { "job_image": dagster_docker_image } } }, }, ) _launch_executor_run( run_config, dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, )
def test_execute_on_celery_k8s_image_from_origin( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url): # Like the previous test, but the image is found from the pipeline origin # rather than the executor config run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=None, job_namespace=helm_namespace), ) run_id = launch_run_over_graphql(dagit_url, run_config=run_config, pipeline_name="demo_pipeline_celery") result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result) updated_run = dagster_instance.get_run_by_id(run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
def test_skip_operator( dagster_airflow_docker_operator_pipeline, dagster_docker_image ): # pylint: disable=redefined-outer-name pipeline_name = "optional_outputs" environments_path = get_test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo", ), environment_yaml=[os.path.join(environments_path, "env_filesystem.yaml")], op_kwargs={"host_tmp_dir": "/tmp"}, image=dagster_docker_image, ) validate_skip_pipeline_execution(results)
def test_execute_subset_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace ): run_config = merge_dicts( merge_yamls( [ os.path.join(get_test_project_environments_path(), "env_subset.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace ), ) pipeline_name = "demo_pipeline_celery" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", solids_to_execute={"count_letters"}, ) with get_test_project_external_pipeline(pipeline_name) as external_pipeline: dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace ) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_fs_storage_no_explicit_base_dir( dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = "demo_pipeline" environments_path = get_test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", pipeline_name ), environment_yaml=[ os.path.join(environments_path, "env.yaml"), ], ) validate_pipeline_execution(results)
def test_s3_storage(dagster_airflow_docker_operator_pipeline, dagster_docker_image): # pylint: disable=redefined-outer-name pipeline_name = "demo_pipeline_s3" environments_path = get_test_project_environments_path() results = dagster_airflow_docker_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo", ), environment_yaml=[ os.path.join(environments_path, "env.yaml"), os.path.join(environments_path, "env_s3.yaml"), ], image=dagster_docker_image, ) validate_pipeline_execution(results)
def test_execute_on_celery_k8s_retry_pipeline( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml") ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) run_id = launch_run_over_graphql(dagit_url, run_config=run_config, pipeline_name="retry_pipeline") result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result) stats = dagster_instance.get_run_stats(run_id) assert stats.steps_succeeded == 1 assert DagsterEventType.STEP_START in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_UP_FOR_RETRY in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_RESTARTED in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_SUCCESS in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run_id) if event.is_dagster_event ]
def test_error_dag_python_job(): job_name = "demo_error_job" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", job_name ) environments_path = get_test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_filesystem.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_for_recon_repo(recon_repo, job_name, run_config) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) assert "Exception: Unusual error" in str(exc_info.value)
def test_execute_on_celery_k8s_with_resource_requirements( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) run_id = launch_run_over_graphql(dagit_url, run_config=run_config, pipeline_name="resources_limit_pipeline") result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_k8s_run_launcher_terminate( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, dagit_url_for_k8s_run_launcher, ): pipeline_name = "slow_pipeline" run_config = load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_s3.yaml")) run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher, run_config=run_config, pipeline_name=pipeline_name) wait_for_job(job_name="dagster-run-%s" % run_id, namespace=helm_namespace_for_k8s_run_launcher) timeout = datetime.timedelta(0, 30) start_time = datetime.datetime.now() while True: assert datetime.datetime.now( ) < start_time + timeout, "Timed out waiting for can_terminate" if can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id): break time.sleep(5) terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id=run_id) start_time = datetime.datetime.now() pipeline_run = None while True: assert datetime.datetime.now( ) < start_time + timeout, "Timed out waiting for termination" pipeline_run = dagster_instance_for_k8s_run_launcher.get_run_by_id( run_id) if pipeline_run.status == PipelineRunStatus.CANCELED: break time.sleep(5) assert pipeline_run.status == PipelineRunStatus.CANCELED assert not can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id)
def test_volume_mounts(dagster_docker_image, dagster_instance, helm_namespace, dagit_url): run_config = deep_merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml") ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) run_id = launch_run_over_graphql( dagit_url, run_config=run_config, pipeline_name="volume_mount_pipeline", mode="celery", ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_k8s_run_launcher( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher ): run_config = load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, mode="default", ) dagster_instance_for_k8s_run_launcher.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher ) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_k8s_run_launcher_secret_from_deployment( helm_namespace_for_k8s_run_launcher, dagit_url_for_k8s_run_launcher, ): # This run_config requires that WORD_FACTOR be set on both the user code deployment # and the run launcher. It will only work if secrets are propagated from the deployment # to the run launcher, since TEST_DEPLOYMENT_SECRET_NAME is only set on the user code # deployment but not on the run launcher config. run_config = load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_config_from_secrets.yaml")) pipeline_name = "demo_pipeline" run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher, run_config=run_config, pipeline_name=pipeline_name) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run_id, namespace=helm_namespace_for_k8s_run_launcher) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_airflow_execution_date_tags_containerized(dagster_docker_image, ): # pylint: disable=redefined-outer-name, unused-argument pipeline_name = "demo_airflow_execution_date_pipeline_s3" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo") environments_path = get_test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_s3.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() with postgres_instance() as instance: dag, tasks = make_airflow_dag_containerized_for_recon_repo( recon_repo, pipeline_name, dagster_docker_image, run_config, instance=instance, op_kwargs={"network_mode": "container:test-postgres-db-airflow"}, ) results = execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) materialized_airflow_execution_date = None for result in results.values(): for event in result: if event.event_type_value == "ASSET_MATERIALIZATION": materialization = event.event_specific_data.materialization materialization_entry = materialization.metadata_entries[0] materialized_airflow_execution_date = materialization_entry.entry_data.text assert execution_date.isoformat( ) == materialized_airflow_execution_date
def test_k8s_run_monitoring( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, dagit_url_for_k8s_run_launcher, ): run_config = merge_dicts( load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "execution": { "k8s": { "config": { "job_namespace": helm_namespace_for_k8s_run_launcher, "image_pull_policy": image_pull_policy(), } } }, }, ) _launch_run_and_wait_for_resume( dagit_url_for_k8s_run_launcher, run_config, dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, )
def test_execute_on_celery_k8s_with_resource_requirements( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "resources_limit_pipeline" with get_test_project_workspace_and_external_pipeline( dagster_instance, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance.launch_run(run.run_id, workspace) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result)
def test_k8s_run_launcher_with_celery_executor_fails( dagster_docker_image, dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher ): run_config = merge_dicts( merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace_for_k8s_run_launcher, ), ) pipeline_name = "demo_pipeline_celery" with get_test_project_external_pipeline_hierarchy( dagster_instance_for_k8s_run_launcher, pipeline_name ) as ( workspace, location, _repo, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest(external_pipeline) run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, mode="default", pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=location.get_external_execution_plan( external_pipeline, run_config, "default", None, None ).execution_plan_snapshot, external_pipeline_origin=reoriginated_pipeline.get_external_origin(), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace) timeout = datetime.timedelta(0, 120) found_pipeline_failure = False start_time = datetime.datetime.now() while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance_for_k8s_run_launcher.all_logs(run.run_id) for event_record in event_records: if event_record.dagster_event: if event_record.dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE: found_pipeline_failure = True if found_pipeline_failure: break time.sleep(5) assert found_pipeline_failure assert ( dagster_instance_for_k8s_run_launcher.get_run_by_id(run.run_id).status == PipelineRunStatus.FAILURE )
def test_memoization_k8s_executor( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, dagster_docker_image, dagit_url_for_k8s_run_launcher, ): ephemeral_path = str(uuid.uuid4()) run_config = deep_merge_dicts( load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "execution": { "k8s": { "config": { "job_namespace": helm_namespace_for_k8s_run_launcher, "job_image": dagster_docker_image, "image_pull_policy": image_pull_policy(), } } }, }, ) run_config = deep_merge_dicts( run_config, { "resources": { "io_manager": { "config": { "s3_prefix": ephemeral_path } } } }, ) # wrap in try-catch to ensure that memoized results are always cleaned from s3 bucket try: pipeline_name = "memoization_pipeline" run_ids = [] for _ in range(2): run_id = launch_run_over_graphql( dagit_url_for_k8s_run_launcher, run_config=run_config, pipeline_name=pipeline_name, mode="k8s", ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run_id, namespace=helm_namespace_for_k8s_run_launcher, ) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) run_ids.append(run_id) # We expect that first run should have to run the step, since it has not yet been # memoized. unmemoized_run_id = run_ids[0] events = dagster_instance_for_k8s_run_launcher.all_logs( unmemoized_run_id) assert len(_get_step_execution_events(events)) == 1 # We expect that second run should not have to run the step, since it has been memoized. memoized_run_id = run_ids[1] events = dagster_instance_for_k8s_run_launcher.all_logs( memoized_run_id) assert len(_get_step_execution_events(events)) == 0 finally: cleanup_memoized_results(define_memoization_pipeline(), "k8s", dagster_instance_for_k8s_run_launcher, run_config)
def test_launch_docker_image_on_pipeline_config(): # Docker image name to use for launch specified as part of the pipeline origin # rather than in the run launcher instance config docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "DOCKER_LAUNCHER_NETWORK", ], "network": {"env": "DOCKER_LAUNCHER_NETWORK"}, "container_kwargs": { "auto_remove": True, }, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with environ({"DOCKER_LAUNCHER_NETWORK": "container:test-postgres-db-docker"}): with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_s3", container_image=docker_image ) as (workspace, orig_pipeline): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image, ) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) poll_for_finished_run(instance, run.run_id, timeout=60) run = instance.get_run_by_id(run.run_id) assert run.status == PipelineRunStatus.SUCCESS assert run.tags[DOCKER_IMAGE_TAG] == docker_image
def test_launch_docker_no_network(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }, # Ensure the container will time out and fail quickly conn_args={ "params": {"connect_timeout": 2}, }, ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_s3", container_image=docker_image ) as (workspace, orig_pipeline): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image, ) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) # Container launches, but run is stuck in STARTING state # due to not being able to access the network run = instance.get_run_by_id(run.run_id) assert run.tags[DOCKER_IMAGE_TAG] == docker_image container_id = run.tags[DOCKER_CONTAINER_ID_TAG] run = instance.get_run_by_id(run.run_id) assert run.status == PipelineRunStatus.STARTING assert run.tags[DOCKER_IMAGE_TAG] == docker_image client = docker.client.from_env() container = None try: start_time = time.time() while True: container = client.containers.get(container_id) if time.time() - start_time > 60: raise Exception("Timed out waiting for container to exit") if container.status == "exited": break time.sleep(3) finally: if container: container.remove(force=True)
def _test_launch(docker_image, launcher_config, terminate=False): if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3") with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_s3") as ( workspace, orig_pipeline, ): external_pipeline = ReOriginatedExternalPipelineForTest(orig_pipeline) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) if not terminate: poll_for_finished_run(instance, run.run_id, timeout=60) assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.SUCCESS else: start_time = time.time() filters = PipelineRunsFilter( run_ids=[run.run_id], statuses=[ PipelineRunStatus.STARTED, ], ) while True: runs = instance.get_runs(filters, limit=1) if runs: break else: time.sleep(0.1) if time.time() - start_time > 60: raise Exception("Timed out waiting for run to start") launcher = instance.run_launcher assert launcher.can_terminate(run.run_id) assert launcher.terminate(run.run_id) poll_for_finished_run(instance, run.run_id, timeout=60) assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.CANCELED
def test_execute_on_celery_k8s_with_hard_failure( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env): run_config = merge_dicts( merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"}, ), ), {"solids": { "hard_fail_or_0": { "config": { "fail": True } } }}, ) pipeline_name = "hard_failer" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher) # Check that pipeline run is marked as failed pipeline_run_status_failure = False start_time = datetime.datetime.now() timeout = datetime.timedelta(0, 120) while datetime.datetime.now() < start_time + timeout: pipeline_run = dagster_instance.get_run_by_id(run.run_id) if pipeline_run.status == PipelineRunStatus.FAILURE: pipeline_run_status_failure = True break time.sleep(5) assert pipeline_run_status_failure # Check for step failure for hard_fail_or_0.compute start_time = datetime.datetime.now() step_failure_found = False while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance.all_logs(run.run_id) for event_record in event_records: if event_record.dagster_event: if (event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE and event_record.dagster_event.step_key == "hard_fail_or_0"): step_failure_found = True break time.sleep(5) assert step_failure_found
def test_terminate_launched_docker_run(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "network": "container:test-postgres-db-docker", } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }) as instance: recon_pipeline = get_test_project_recon_pipeline( "hanging_pipeline", docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, ) run_id = run.run_id external_pipeline = ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline("hanging_pipeline", container_image=docker_image), container_image=docker_image, ) instance.launch_run(run_id, external_pipeline) poll_for_step_start(instance, run_id) assert instance.run_launcher.can_terminate(run_id) assert instance.run_launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED run_logs = instance.all_logs(run_id) _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request"), ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'), ("PIPELINE_CANCELED", 'Execution of pipeline "hanging_pipeline" canceled.'), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_docker_monitoring(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "networks": ["container:test-postgres-db-docker"], "container_kwargs": { # "auto_remove": True, "volumes": ["/var/run/docker.sock:/var/run/docker.sock"], }, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_dicts( load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "solids": { "multiply_the_word_slow": { "inputs": { "word": "bar" }, "config": { "factor": 2, "sleep_time": 20 }, } }, "execution": { "docker": { "config": {} } }, }, ) with docker_postgres_instance({ "run_monitoring": { "enabled": True }, "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, }, }) as instance: recon_pipeline = get_test_project_recon_pipeline( "demo_pipeline_docker_slow", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_docker_slow", container_image=docker_image) as ( workspace, orig_pipeline, ): with start_daemon(): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline. get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) with log_run_events(instance, run.run_id): instance.launch_run(run.run_id, workspace) start_time = time.time() while time.time() - start_time < 60: run = instance.get_run_by_id(run.run_id) if run.status == PipelineRunStatus.STARTED: break assert run.status == PipelineRunStatus.STARTING time.sleep(1) time.sleep(3) instance.run_launcher._get_container( # pylint:disable=protected-access instance.get_run_by_id(run.run_id)).stop() # daemon resumes the run poll_for_finished_run(instance, run.run_id, timeout=90) assert instance.get_run_by_id( run.run_id).status == PipelineRunStatus.SUCCESS