def test_execute_on_celery_k8s_default( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "demo_pipeline_celery" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", ) with get_test_project_external_pipeline( pipeline_name) as external_pipeline: dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) updated_run = dagster_instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(), ) assert 'celery-k8s' in run_config['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, tags=tags, mode='default', ) dagster_instance.launch_run( run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
def environment_dict(self): if self.environment_files is None: return None file_set = set() for file_glob in self.environment_files: files = glob(file_glob) if not files: raise DagsterInvalidDefinitionError( 'File or glob pattern "{file_glob}" for "environment_files" in preset "{name}" for ' 'pipeline "{pipeline_name}" produced no results.'.format( name=self.name, file_glob=file_glob, pipeline_name=self.pipeline_name)) file_set.update(map(os.path.realpath, files)) try: merged = merge_yamls(list(file_set)) except yaml.YAMLError as err: six.raise_from( DagsterInvariantViolationError( 'Encountered error attempting to parse yaml. Parsing files {file_set} loaded by ' 'file/patterns {files} on preset "{name}" for pipeline "{pipeline_name}".' .format( file_set=file_set, files=self.environment_files, name=self.name, pipeline_name=self.pipeline_name, )), err, ) return merged
def test_execute_on_celery( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, helm_namespace=helm_namespace), ) pipeline_name = 'demo_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run( run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): environment_dict = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(), ) assert 'celery-k8s' in environment_dict['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, environment_dict=environment_dict, tags=tags, mode='default', ) dagster_instance.launch_run(run.run_id) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert (result['data']['startPipelineExecutionForCreatedRun']['__typename'] == 'StartPipelineRunSuccess')
def test_execute_on_celery_resource_requirements( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace ): run_config = merge_dicts( merge_yamls([os.path.join(test_project_environments_path(), 'env_s3.yaml'),]), { 'execution': { 'celery-k8s': { 'config': { 'job_image': dagster_docker_image, 'job_namespace': helm_namespace, 'image_pull_policy': 'Always', 'env_config_maps': ['dagster-pipeline-env'], } } }, }, ) pipeline_name = 'resources_limit_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run(run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs( job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace ) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess' ), 'no match, result: {}'.format(result)
def from_files(name, environment_files=None, solid_subset=None, mode=None): check.str_param(name, 'name') environment_files = check.opt_list_param(environment_files, 'environment_files') solid_subset = check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) file_set = set() for file_glob in environment_files or []: files = glob(file_glob) if not files: raise DagsterInvalidDefinitionError( 'File or glob pattern "{file_glob}" for "environment_files" in preset ' '"{name}" produced no results.'.format(name=name, file_glob=file_glob) ) file_set.update(map(os.path.realpath, files)) try: merged = merge_yamls(list(file_set)) except yaml.YAMLError as err: six.raise_from( DagsterInvariantViolationError( 'Encountered error attempting to parse yaml. Parsing files {file_set} ' 'loaded by file/patterns {files} on preset "{name}".'.format( file_set=file_set, files=environment_files, name=name ) ), err, ) return PresetDefinition(name, merged, solid_subset, mode)
def test_execute_subset_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_subset.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "demo_pipeline_celery" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", solids_to_execute={"count_letters"}, ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
def test_execute_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = 'demo_pipeline_celery' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline(pipeline_name)), ) result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result)
def test_execute_on_celery_k8s_default( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url, ): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) run_id = launch_run_over_graphql(dagit_url, run_config=run_config, pipeline_name="demo_pipeline_celery") result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result) updated_run = dagster_instance.get_run_by_id(run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
def test_execute_on_celery_k8s_job_api_with_legacy_configmap_set( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url): # Originally, jobs needed to include "dagster-pipeline-env" to pick up needed config when # using the helm chart - it's no longer needed, but verify that nothing breaks if it's included run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_job_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace, include_dagster_pipeline_env=True, ), ) run_id = launch_run_over_graphql(dagit_url, run_config=run_config, pipeline_name="demo_job_celery") result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result) updated_run = dagster_instance.get_run_by_id(run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
def test_run_monitoring_fails_on_interrupt( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url ): run_config = merge_dicts( merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ), get_celery_job_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace ), ) pipeline_name = "demo_job_celery" try: run_id = launch_run_over_graphql( dagit_url, run_config=run_config, pipeline_name=pipeline_name ) start_time = time.time() while time.time() - start_time < 60: run = dagster_instance.get_run_by_id(run_id) if run.status == PipelineRunStatus.STARTED: break assert run.status == PipelineRunStatus.STARTING time.sleep(1) assert delete_job(get_job_name_from_run_id(run_id), helm_namespace) poll_for_finished_run(dagster_instance, run.run_id, timeout=120) assert dagster_instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE finally: log_run_events(dagster_instance, run_id)
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): environment_dict = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_filesystem.yaml'), ]), get_celery_engine_config(), ) assert 'celery' in environment_dict['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = dagster_instance.create_run(pipeline_name=pipeline_name, environment_dict=environment_dict, tags=tags, mode='default') dagster_instance.launch_run(run.run_id) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] # this is bad test but proves that we got celery configured properly # to get it working would involve relying on s3 / gcs for storage assert result['data']['startPipelineExecutionForCreatedRun'][ '__typename'] == 'PythonError' assert ( 'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp' in result['data']['startPipelineExecutionForCreatedRun']['message'])
def test_execute_queued_run_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance_for_daemon, helm_namespace_for_daemon, dagit_url_for_daemon, ): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace_for_daemon, ), ) run_id = launch_run_over_graphql(dagit_url_for_daemon, run_config=run_config, pipeline_name="demo_pipeline_celery") wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace_for_daemon) logs = dagster_instance_for_daemon.all_logs(run_id) assert_events_in_order( logs, [ "PIPELINE_ENQUEUED", "PIPELINE_DEQUEUED", "PIPELINE_STARTING", "PIPELINE_SUCCESS" ], )
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace): environment_dict = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_filesystem.yaml'), ]), get_celery_engine_config(), ) assert 'celery' in environment_dict['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = dagster_instance.create_run(pipeline_name=pipeline_name, environment_dict=environment_dict, tags=tags, mode='default') dagster_instance.launch_run(run.run_id) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] # This is not an ideal test but proves that we got celery configured properly. # We detect that by seeing an error that we know happens after config. assert result['data']['startPipelineExecutionForCreatedRun'][ '__typename'] == 'PythonError' assert ( 'When executing count_letters.compute discovered required outputs missing from previous step' in result['data']['startPipelineExecutionForCreatedRun']['message'])
def test_k8s_run_launcher_celery(dagster_instance): # pylint: disable=redefined-outer-name run_id = uuid.uuid4().hex environment_dict = merge_dicts( merge_yamls([ os.path.join(environments_path(), 'env.yaml'), os.path.join(environments_path(), 'env_filesystem.yaml'), ]), get_celery_engine_config(), ) assert 'celery' in environment_dict['execution'] pipeline_name = 'demo_pipeline_celery' tags = {'key': 'value'} run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict, tags) dagster_instance.launch_run(run) success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id) result = parse_raw_res(raw_logs.split('\n')) assert success assert not result.get('errors') assert result['data'] # this is bad test but proves that we got celery configured properly # to get it working would involve relying on s3 / gcs for storage assert result['data']['startPipelineExecutionForCreatedRun'][ '__typename'] == 'PythonError' assert ( 'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp' in result['data']['startPipelineExecutionForCreatedRun']['message'])
def test_k8s_run_launcher_with_celery_executor_fails( dagster_docker_image, dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace_for_k8s_run_launcher, ), ) pipeline_name = "demo_pipeline_celery" with get_test_project_location_and_external_pipeline(pipeline_name) as ( location, external_pipeline, ): run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, mode="default", pipeline_snapshot=external_pipeline.pipeline_snapshot, execution_plan_snapshot=location.get_external_execution_plan( external_pipeline, run_config, "default", None, None).execution_plan_snapshot, ) dagster_instance_for_k8s_run_launcher.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) timeout = datetime.timedelta(0, 120) found_pipeline_failure = False start_time = datetime.datetime.now() while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance_for_k8s_run_launcher.all_logs( run.run_id) for event_record in event_records: if event_record.dagster_event: if (event_record.dagster_event.event_type == DagsterEventType.PIPELINE_INIT_FAILURE): found_pipeline_failure = True if found_pipeline_failure: break time.sleep(5) assert found_pipeline_failure assert (dagster_instance_for_k8s_run_launcher.get_run_by_id( run.run_id).status == PipelineRunStatus.FAILURE)
def from_files(name, environment_files=None, solid_selection=None, mode=None): '''Static constructor for presets from YAML files. Args: name (str): The name of this preset. Must be unique in the presets defined on a given pipeline. environment_files (Optional[List[str]]): List of paths or glob patterns for yaml files to load and parse as the environment config for this preset. solid_selection (Optional[List[str]]): A list of solid subselection (including single solid names) to execute with the preset. e.g. ['*some_solid+', 'other_solid'] mode (Optional[str]): The mode to apply when executing this preset. (default: 'default') Returns: PresetDefinition: A PresetDefinition constructed from the provided YAML files. Raises: DagsterInvariantViolationError: When one of the YAML files is invalid and has a parse error. ''' check.str_param(name, 'name') environment_files = check.opt_list_param(environment_files, 'environment_files') solid_selection = check.opt_nullable_list_param(solid_selection, 'solid_selection', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) filenames = [] for file_glob in environment_files or []: globbed_files = glob(file_glob) if not globbed_files: raise DagsterInvalidDefinitionError( 'File or glob pattern "{file_glob}" for "environment_files" in preset ' '"{name}" produced no results.'.format( name=name, file_glob=file_glob)) filenames += [ os.path.realpath(globbed_file) for globbed_file in globbed_files ] try: merged = merge_yamls(filenames) except yaml.YAMLError as err: six.raise_from( DagsterInvariantViolationError( 'Encountered error attempting to parse yaml. Parsing files {file_set} ' 'loaded by file/patterns {files} on preset "{name}".'. format(file_set=filenames, files=environment_files, name=name)), err, ) return PresetDefinition(name, merged, solid_selection, mode)
def test_execute_on_celery_k8s_retry_pipeline( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml") ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "retry_pipeline" run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", ) with get_test_project_external_pipeline( pipeline_name) as external_pipeline: dagster_instance.launch_run( run.run_id, ReOriginatedExternalPipelineForTest(external_pipeline), ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) stats = dagster_instance.get_run_stats(run.run_id) assert stats.steps_succeeded == 1 assert DagsterEventType.STEP_START in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_UP_FOR_RETRY in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_RESTARTED in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_SUCCESS in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ]
def test_execute_celery_docker(): docker_image = test_project_docker_image() docker_config = { 'image': docker_image, 'env_vars': ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'], } if IS_BUILDKITE: ecr_client = boto3.client('ecr', region_name='us-west-1') token = ecr_client.get_authorization_token() username, password = (base64.b64decode( token['authorizationData'][0] ['authorizationToken']).decode().split(':')) registry = token['authorizationData'][0]['proxyEndpoint'] docker_config['registry'] = { 'url': registry, 'username': username, 'password': password, } else: try: client = docker.from_env() client.images.get(docker_image) print( # pylint: disable=print-call 'Found existing image tagged {image}, skipping image build. To rebuild, first run: ' 'docker rmi {image}'.format(image=docker_image)) except docker.errors.ImageNotFound: build_and_tag_test_image(docker_image) with seven.TemporaryDirectory() as temp_dir: run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), 'env.yaml'), os.path.join(test_project_environments_path(), 'env_s3.yaml'), ]), { 'execution': { 'celery-docker': { 'config': { 'docker': docker_config, 'config_source': { 'task_always_eager': True }, } } }, }, ) result = execute_pipeline( get_test_project_recon_pipeline('docker_celery_pipeline'), run_config=run_config, instance=DagsterInstance.local_temp(temp_dir), ) assert result.success
def test_execute_celery_docker(): docker_image = test_project_docker_image() docker_config = { "image": docker_image, "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"], } if IS_BUILDKITE: ecr_client = boto3.client("ecr", region_name="us-west-1") token = ecr_client.get_authorization_token() username, password = (base64.b64decode( token["authorizationData"][0] ["authorizationToken"]).decode().split(":")) registry = token["authorizationData"][0]["proxyEndpoint"] docker_config["registry"] = { "url": registry, "username": username, "password": password, } else: try: client = docker.from_env() client.images.get(docker_image) print( # pylint: disable=print-call "Found existing image tagged {image}, skipping image build. To rebuild, first run: " "docker rmi {image}".format(image=docker_image)) except docker.errors.ImageNotFound: build_and_tag_test_image(docker_image) with seven.TemporaryDirectory() as temp_dir: run_config = merge_dicts( merge_yamls([ os.path.join(test_project_environments_path(), "env.yaml"), os.path.join(test_project_environments_path(), "env_s3.yaml"), ]), { "execution": { "celery-docker": { "config": { "docker": docker_config, "config_source": { "task_always_eager": True }, } } }, }, ) result = execute_pipeline( get_test_project_recon_pipeline("docker_celery_pipeline"), run_config=run_config, instance=DagsterInstance.local_temp(temp_dir), ) assert result.success
def test_terminate_launched_docker_run(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",], "network": "container:test-postgres-db-docker", } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls([os.path.join(get_test_project_environments_path(), "env_s3.yaml"),]) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("hanging_pipeline", docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, ) run_id = run.run_id external_pipeline = ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline("hanging_pipeline", container_image=docker_image), container_image=docker_image, ) instance.launch_run(run_id, external_pipeline) poll_for_step_start(instance, run_id) assert instance.run_launcher.can_terminate(run_id) assert instance.run_launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED run_logs = instance.all_logs(run_id) _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request"), ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'), ("PIPELINE_CANCELED", 'Execution of pipeline "hanging_pipeline" canceled.'), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_launch_docker_image_on_pipeline_config(): # Docker image name to use for launch specified as part of the pipeline origin # rather than in the run launcher instance config docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "network": "container:test-postgres-db-docker", "container_kwargs": { "auto_remove": True, }, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }) as instance: recon_pipeline = get_test_project_recon_pipeline( "demo_pipeline", docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, ) with get_test_project_external_pipeline( "demo_pipeline", container_image=docker_image) as orig_pipeline: external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image, ) instance.launch_run(run.run_id, external_pipeline) poll_for_finished_run(instance, run.run_id, timeout=60) run = instance.get_run_by_id(run.run_id) assert run.status == PipelineRunStatus.SUCCESS assert run.tags[DOCKER_IMAGE_TAG] == docker_image
def test_memoization_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace, dagit_url): ephemeral_prefix = str(uuid.uuid4()) run_config = deep_merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml") ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) run_config = deep_merge_dicts( run_config, { "resources": { "io_manager": { "config": { "s3_prefix": ephemeral_prefix } } } }, ) try: run_ids = [] for _ in range(2): run_id = launch_run_over_graphql( dagit_url, run_config=run_config, pipeline_name="memoization_pipeline", mode="celery", ) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) run_ids.append(run_id) unmemoized_run_id = run_ids[0] step_events = _get_step_events( dagster_instance.all_logs(unmemoized_run_id)) assert len(step_events) == 4 memoized_run_id = run_ids[1] step_events = _get_step_events( dagster_instance.all_logs(memoized_run_id)) assert len(step_events) == 0 finally: cleanup_memoized_results(define_memoization_pipeline(), "celery", dagster_instance, run_config)
def test_execute_retry_pipeline_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls( [os.path.join(test_project_environments_path(), 'env_s3.yaml')]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, helm_namespace=helm_namespace), ) pipeline_name = 'retry_pipeline' run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default', ) dagster_instance.launch_run( run.run_id, get_test_project_external_pipeline(pipeline_name)) result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace) assert not result.get('errors') assert result['data'] assert ( result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result) stats = dagster_instance.get_run_stats(run.run_id) assert stats.steps_succeeded == 1 assert DagsterEventType.STEP_START in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_UP_FOR_RETRY in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_RESTARTED in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_SUCCESS in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ]
def test_execute_on_celery_k8s_with_hard_failure( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env, dagit_url): run_config = merge_dicts( merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"}, ), ), {"solids": { "hard_fail_or_0": { "config": { "fail": True } } }}, ) run_id = launch_run_over_graphql(dagit_url, run_config=run_config, pipeline_name="hard_failer") # Check that pipeline run is marked as failed pipeline_run_status_failure = False start_time = datetime.datetime.now() timeout = datetime.timedelta(0, 120) while datetime.datetime.now() < start_time + timeout: pipeline_run = dagster_instance.get_run_by_id(run_id) if pipeline_run.status == PipelineRunStatus.FAILURE: pipeline_run_status_failure = True break time.sleep(5) assert pipeline_run_status_failure # Check for step failure for hard_fail_or_0.compute start_time = datetime.datetime.now() step_failure_found = False while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance.all_logs(run_id) for event_record in event_records: if event_record.dagster_event: if (event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE and event_record.dagster_event.step_key == "hard_fail_or_0"): step_failure_found = True break time.sleep(5) assert step_failure_found
def test_execute_on_celery_k8s_with_termination( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) _test_termination(dagster_instance, run_config)
def test_execute_on_celery_k8s_with_env_var_and_termination( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env ): run_config = merge_dicts( merge_yamls([os.path.join(get_test_project_environments_path(), "env_s3.yaml"),]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"}, ), ) _test_termination(dagster_instance, run_config)
def test_merge_yamls(): assert merge_yamls( [ file_relative_path(__file__, os.path.join("yamls", "yaml_one.yaml")), file_relative_path(__file__, os.path.join("yamls", "yaml_two.yaml")), ] ) == {"key_one": {"key_one_one": "value_one", "key_one_two": "value_two"}} with pytest.raises( check.CheckError, match=( "Expected YAML from file .* to parse to dictionary, " 'instead got: "this is a valid YAML string but not a dictionary"' ), ): merge_yamls( [ file_relative_path(__file__, os.path.join("yamls", "yaml_one.yaml")), file_relative_path(__file__, os.path.join("yamls", "bad", "a_string.yaml")), ] )
def test_merge_yamls(): assert merge_yamls([ file_relative_path(__file__, 'yamls/yaml_one.yaml'), file_relative_path(__file__, 'yamls/yaml_two.yaml'), ]) == { 'key_one': { 'key_one_one': 'value_one', 'key_one_two': 'value_two' } } with pytest.raises( check.CheckError, match= ('Expected YAML from file .*?/yamls/bad/a_string.yaml to parse to dictionary, ' 'instead got: "this is a valid YAML string but not a dictionary"'), ): merge_yamls([ file_relative_path(__file__, 'yamls/yaml_one.yaml'), file_relative_path(__file__, 'yamls/bad/a_string.yaml'), ])