def test_execute_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace ): run_config = merge_dicts( merge_yamls( [ os.path.join(test_project_environments_path(), "env.yaml"), os.path.join(test_project_environments_path(), "env_s3.yaml"), ] ), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace ), ) pipeline_name = "demo_pipeline_celery" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace ) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(), ) assert 'celery-k8s' in run_config['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, tags=tags, mode='default', ) dagster_instance.launch_run( run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
def test_execute_subset_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env_subset.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = 'demo_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', solids_to_execute={'count_letters'}, ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result)
def test_execute_on_celery( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, helm_namespace=helm_namespace), ) pipeline_name = 'demo_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run( run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): environment_dict = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(), ) assert 'celery-k8s' in environment_dict['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, environment_dict=environment_dict, tags=tags, mode='default', ) dagster_instance.launch_run(run.run_id) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert (result['data']['startPipelineExecutionForCreatedRun']['__typename'] == 'StartPipelineRunSuccess')
def test_execute_celery_docker(): docker_image = test_project_docker_image() docker_config = { "image": docker_image, "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"], } if IS_BUILDKITE: ecr_client = boto3.client("ecr", region_name="us-west-1") token = ecr_client.get_authorization_token() username, password = (base64.b64decode( token["authorizationData"][0] ["authorizationToken"]).decode().split(":")) registry = token["authorizationData"][0]["proxyEndpoint"] docker_config["registry"] = { "url": registry, "username": username, "password": password, } else: try: client = docker.from_env() client.images.get(docker_image) print( # pylint: disable=print-call "Found existing image tagged {image}, skipping image build. To rebuild, first run: " "docker rmi {image}".format(image=docker_image)) except docker.errors.ImageNotFound: build_and_tag_test_image(docker_image) with seven.TemporaryDirectory() as temp_dir: run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), "env.yaml"), os.path.join(test_project_environments_path(), "env_s3.yaml"), ]), { "execution": { "celery-docker": { "config": { "docker": docker_config, "config_source": { "task_always_eager": True }, } } }, }, ) result = execute_pipeline( get_test_project_recon_pipeline("docker_celery_pipeline"), run_config=run_config, instance=DagsterInstance.local_temp(temp_dir), ) assert result.success
def test_execute_celery_docker(): docker_image = test_project_docker_image() docker_config = { 'image': docker_image, 'env_vars': ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'], } if IS_BUILDKITE: ecr_client = boto3.client('ecr', region_name='us-west-1') token = ecr_client.get_authorization_token() username, password = (base64.b64decode( token['authorizationData'][0] ['authorizationToken']).decode().split(':')) registry = token['authorizationData'][0]['proxyEndpoint'] docker_config['registry'] = { 'url': registry, 'username': username, 'password': password, } else: try: client = docker.from_env() client.images.get(docker_image) print( # pylint: disable=print-call 'Found existing image tagged {image}, skipping image build. To rebuild, first run: ' 'docker rmi {image}'.format(image=docker_image)) except docker.errors.ImageNotFound: build_and_tag_test_image(docker_image) with seven.TemporaryDirectory() as temp_dir: run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), { 'execution': { 'celery-docker': { 'config': { 'docker': docker_config, 'config_source': { 'task_always_eager': True }, } } }, }, ) result = execute_pipeline( get_test_project_recon_pipeline('docker_celery_pipeline'), run_config=run_config, instance=DagsterInstance.local_temp(temp_dir), ) assert result.success
def test_execute_on_celery( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace ): environment_dict = merge_dicts( merge_yamls( [ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ] ), { 'execution': { 'celery-k8s': { 'config': { 'broker': {'env': 'DAGSTER_K8S_CELERY_BROKER'}, 'backend': {'env': 'DAGSTER_K8S_CELERY_BACKEND'}, 'job_image': dagster_docker_image, 'job_namespace': helm_namespace, 'instance_config_map': 'dagster-instance', 'postgres_password_secret': 'dagster-postgresql-secret', 'image_pull_policy': 'Always', 'env_config_maps': ['dagster-pipeline-env'], } } }, }, ) pipeline_name = 'demo_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, environment_dict=environment_dict, mode='default', ) dagster_instance.launch_run(run.run_id) result = wait_for_job_and_get_logs( job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace ) assert not result.get('errors') assert result['data'] assert ( result['data']['startPipelineExecutionForCreatedRun']['__typename'] == 'StartPipelineRunSuccess' )
def test_error_dag_k8s(dagster_docker_image, cluster_provider): _check_aws_creds_available() pipeline_name = "demo_error_pipeline" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo") environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_s3.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_kubernetized_for_recon_repo( recon_repo=recon_repo, pipeline_name=pipeline_name, image=dagster_docker_image, namespace="default", run_config=run_config, op_kwargs={ "config_file": os.environ["KUBECONFIG"], "env_vars": { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"], "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"], }, }, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert "Exception: Unusual error" in str(exc_info.value)
def test_s3_storage(dagster_airflow_k8s_operator_pipeline, dagster_docker_image, cluster_provider): _check_aws_creds_available() environments_path = test_project_environments_path() pipeline_name = "demo_pipeline" results = dagster_airflow_k8s_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo", ), environment_yaml=[ os.path.join(environments_path, "env.yaml"), os.path.join(environments_path, "env_s3.yaml"), ], image=dagster_docker_image, op_kwargs={ "config_file": os.environ["KUBECONFIG"], "env_vars": { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"], "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"], }, }, ) validate_pipeline_execution(results)
def test_airflow_execution_date_tags(): pipeline_name = 'demo_airflow_execution_date_pipeline' recon_repo = ReconstructableRepository.for_module( 'dagster_test.test_project.test_pipelines.repo', pipeline_name) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, 'env_filesystem.yaml'), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_for_recon_repo(recon_repo, pipeline_name, run_config) results = execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) materialized_airflow_execution_date = None for result in results.values(): for event in result: if event.event_type_value == 'STEP_MATERIALIZATION': materialization = event.event_specific_data.materialization materialization_entry = materialization.metadata_entries[0] materialized_airflow_execution_date = materialization_entry.entry_data.text assert execution_date.isoformat() == materialized_airflow_execution_date
def test_error_dag_containerized(dagster_docker_image): # pylint: disable=redefined-outer-name pipeline_name = "demo_error_pipeline" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo" ) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_s3.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() with postgres_instance() as instance: dag, tasks = make_airflow_dag_containerized_for_recon_repo( recon_repo, pipeline_name, dagster_docker_image, run_config, instance=instance, op_kwargs={"network_mode": "container:test-postgres-db-airflow"}, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert "Exception: Unusual error" in str(exc_info.value)
def test_s3_storage(dagster_airflow_k8s_operator_pipeline, dagster_docker_image, cluster_provider): print('--- :airflow: test_kubernetes.test_s3_storage') _check_aws_creds_available() environments_path = test_project_environments_path() pipeline_name = 'demo_pipeline' results = dagster_airflow_k8s_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( 'dagster_test.test_project.test_pipelines.repo', 'define_demo_execution_repo', ), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_s3.yaml'), ], image=dagster_docker_image, op_kwargs={ 'config_file': os.environ['KUBECONFIG'], 'env_vars': { 'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'], 'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'], }, }, ) validate_pipeline_execution(results)
def test_valid_job_format(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, pod_name=pod_name, component='run_coordinator', ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources='', ).strip())
def test_valid_job_format(run_launcher): docker_image = test_project_docker_image() environment_dict = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, environment_dict=environment_dict) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_graphql_k8s_job( run_launcher.job_config, args=[ '-p', 'startPipelineExecutionForCreatedRun', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, pod_name=pod_name, component='runmaster', ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, ).strip())
def test_airflow_execution_date_tags_containerized(dagster_docker_image, ): # pylint: disable=redefined-outer-name, unused-argument pipeline_name = "demo_airflow_execution_date_pipeline" recon_repo = ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo") environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, "env_s3.yaml"), ] run_config = load_yaml_from_glob_list(environment_yaml) execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_containerized_for_recon_repo( recon_repo, pipeline_name, dagster_docker_image, run_config) results = execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date) materialized_airflow_execution_date = None for result in results.values(): for event in result: if event.event_type_value == "STEP_MATERIALIZATION": materialization = event.event_specific_data.materialization materialization_entry = materialization.metadata_entries[0] materialized_airflow_execution_date = materialization_entry.entry_data.text assert execution_date.isoformat() == materialized_airflow_execution_date
def test_error_dag_k8s(dagster_docker_image, cluster_provider): print('--- :airflow: test_kubernetes.test_error_dag_k8s') _check_aws_creds_available() pipeline_name = 'demo_error_pipeline' handle = ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name) environments_path = test_project_environments_path() environment_yaml = [ os.path.join(environments_path, 'env_s3.yaml'), ] environment_dict = load_yaml_from_glob_list(environment_yaml) run_id = make_new_run_id() execution_date = timezone.utcnow() dag, tasks = make_airflow_dag_kubernetized_for_handle( handle=handle, pipeline_name=pipeline_name, image=dagster_docker_image, namespace='default', environment_dict=environment_dict, op_kwargs={ 'config_file': os.environ['KUBECONFIG'], 'env_vars': { 'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'], 'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'], }, }, ) with pytest.raises(AirflowException) as exc_info: execute_tasks_in_dag(dag, tasks, run_id, execution_date) assert 'Exception: Unusual error' in str(exc_info.value)
def test_k8s_run_launcher_default( dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher ): # sanity check that we have a K8sRunLauncher check.inst(dagster_instance_for_k8s_run_launcher.run_launcher, K8sRunLauncher) pods = DagsterKubernetesClient.production_client().core_api.list_namespaced_pod( namespace=helm_namespace_for_k8s_run_launcher ) celery_pod_names = [p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name] check.invariant(not celery_pod_names) run_config = load_yaml_from_path(os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" tags = {"key": "value"} run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, tags=tags, mode="default", ) dagster_instance_for_k8s_run_launcher.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher ) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_valid_job_format(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=["dagster"], args=["api", "execute_run_with_structured_logs"], job_name=job_name, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources="", ).strip())
def test_my_custom_operator( dagster_airflow_custom_operator_pipeline, caplog, ): # pylint: disable=redefined-outer-name caplog.set_level(logging.INFO, logger='CustomOperatorLogger') pipeline_name = 'demo_pipeline' operator = CustomOperator environments_path = test_project_environments_path() results = dagster_airflow_custom_operator_pipeline( pipeline_name=pipeline_name, handle=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name), operator=operator, environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_filesystem_no_explicit_base_dir.yaml'), ], ) validate_pipeline_execution(results) log_lines = 0 for record in caplog.records: if record.name == 'CustomOperatorLogger': log_lines += 1 assert record.message == 'CustomOperator is called' assert log_lines == 2
def test_execute_on_celery_resource_requirements( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace ): run_config = merge_dicts( merge_yamls([os.path.join(test_project_environments_path(), 'env_s3.yaml'),]), { 'execution': { 'celery-k8s': { 'config': { 'job_image': dagster_docker_image, 'job_namespace': helm_namespace, 'image_pull_policy': 'Always', 'env_config_maps': ['dagster-pipeline-env'], } } }, }, ) pipeline_name = 'resources_limit_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run(run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs( job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace ) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess' ), 'no match, result: {}'.format(result)
def test_execute_on_celery_k8s_retry_pipeline( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls( [os.path.join(test_project_environments_path(), 'env_s3.yaml')]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = 'retry_pipeline' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result) stats = dagster_instance.get_run_stats(run.run_id) assert stats.steps_succeeded == 1 assert DagsterEventType.STEP_START in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_UP_FOR_RETRY in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_RESTARTED in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_SUCCESS in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ]
def test_execute_on_celery_k8s_with_termination( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) _test_termination(dagster_instance, run_config)
def test_valid_job_format_with_backcompat_resources(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, }) }) user_defined_k8s_config = get_user_defined_k8s_config(tags) job_name = "dagster-run-%s" % run.run_id pod_name = "dagster-run-%s" % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=["dagster-graphql"], args=[ "-p", "executeRunInProcess", "-v", seven.json.dumps({"runId": run.run_id}), ], job_name=job_name, user_defined_k8s_config=user_defined_k8s_config, pod_name=pod_name, component="run_coordinator", ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=""" resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi""", ).strip())
def test_skip_operator( dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = 'optional_outputs' environments_path = test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name), environment_yaml=[os.path.join(environments_path, 'env_filesystem.yaml')], ) validate_skip_pipeline_execution(results)
def test_valid_job_format_with_resources(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ 'requests': { 'cpu': '250m', 'memory': '64Mi' }, 'limits': { 'cpu': '500m', 'memory': '2560Mi' }, }) }) resources = get_k8s_resource_requirements(tags) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, resources=resources, pod_name=pod_name, component='runmaster', ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=''' resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi''', ).strip())
def test_execute_on_celery_k8s_with_hard_failure( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env ): run_config = merge_dicts( merge_dicts( merge_yamls([os.path.join(test_project_environments_path(), "env_s3.yaml"),]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"}, ), ), {"solids": {"hard_fail_or_0": {"config": {"fail": True}}}}, ) pipeline_name = "hard_failer" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)), ) assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher) # Check that pipeline run is marked as failed pipeline_run_status_failure = False start_time = datetime.datetime.now() timeout = datetime.timedelta(0, 120) while datetime.datetime.now() < start_time + timeout: pipeline_run = dagster_instance.get_run_by_id(run.run_id) if pipeline_run.status == PipelineRunStatus.FAILURE: pipeline_run_status_failure = True break time.sleep(5) assert pipeline_run_status_failure # Check for step failure for hard_fail_or_0.compute start_time = datetime.datetime.now() step_failure_found = False while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance.all_logs(run.run_id) for event_record in event_records: if event_record.dagster_event: if ( event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE and event_record.dagster_event.step_key == "hard_fail_or_0.compute" ): step_failure_found = True break time.sleep(5) assert step_failure_found
def test_execute_on_celery_k8s_with_env_var_and_termination( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env ): run_config = merge_dicts( merge_yamls([os.path.join(test_project_environments_path(), "env_s3.yaml"),]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"}, ), ) _test_termination(dagster_instance, run_config)
def test_gcs_storage(dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = 'demo_pipeline_gcs' environments_path = test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, handle=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name), environment_yaml=[ os.path.join(environments_path, 'env.yaml'), os.path.join(environments_path, 'env_gcs.yaml'), ], ) validate_pipeline_execution(results)
def test_s3_storage(dagster_airflow_python_operator_pipeline, ): # pylint: disable=redefined-outer-name pipeline_name = "demo_pipeline" environments_path = test_project_environments_path() results = dagster_airflow_python_operator_pipeline( pipeline_name=pipeline_name, recon_repo=ReconstructableRepository.for_module( "dagster_test.test_project.test_pipelines.repo", pipeline_name), environment_yaml=[ os.path.join(environments_path, "env.yaml"), os.path.join(environments_path, "env_s3.yaml"), ], ) validate_pipeline_execution(results)