Example #1
0
def test_execute_on_celery_k8s_default(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "demo_pipeline_celery"
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
    )

    with get_test_project_external_pipeline(
            pipeline_name) as external_pipeline:
        dagster_instance.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                               run.run_id,
                                               namespace=helm_namespace)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)

        updated_run = dagster_instance.get_run_by_id(run.run_id)
        assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
Example #2
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in run_config['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
Example #3
0
    def environment_dict(self):
        if self.environment_files is None:
            return None

        file_set = set()
        for file_glob in self.environment_files:
            files = glob(file_glob)
            if not files:
                raise DagsterInvalidDefinitionError(
                    'File or glob pattern "{file_glob}" for "environment_files" in preset "{name}" for '
                    'pipeline "{pipeline_name}" produced no results.'.format(
                        name=self.name,
                        file_glob=file_glob,
                        pipeline_name=self.pipeline_name))

            file_set.update(map(os.path.realpath, files))

        try:
            merged = merge_yamls(list(file_set))
        except yaml.YAMLError as err:
            six.raise_from(
                DagsterInvariantViolationError(
                    'Encountered error attempting to parse yaml. Parsing files {file_set} loaded by '
                    'file/patterns {files} on preset "{name}" for pipeline "{pipeline_name}".'
                    .format(
                        file_set=file_set,
                        files=self.environment_files,
                        name=self.name,
                        pipeline_name=self.pipeline_name,
                    )),
                err,
            )

        return merged
Example #4
0
def test_execute_on_celery(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 helm_namespace=helm_namespace),
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))

    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
Example #5
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (result['data']['startPipelineExecutionForCreatedRun']['__typename']
            == 'StartPipelineRunSuccess')
def test_execute_on_celery_resource_requirements(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    run_config = merge_dicts(
        merge_yamls([os.path.join(test_project_environments_path(), 'env_s3.yaml'),]),
        {
            'execution': {
                'celery-k8s': {
                    'config': {
                        'job_image': dagster_docker_image,
                        'job_namespace': helm_namespace,
                        'image_pull_policy': 'Always',
                        'env_config_maps': ['dagster-pipeline-env'],
                    }
                }
            },
        },
    )

    pipeline_name = 'resources_limit_pipeline_celery'
    run = create_run_for_test(
        dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default',
    )

    dagster_instance.launch_run(run.run_id, get_test_project_external_pipeline(pipeline_name))

    result = wait_for_job_and_get_logs(
        job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace
    )

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'
    ), 'no match, result: {}'.format(result)
Example #7
0
    def from_files(name, environment_files=None, solid_subset=None, mode=None):
        check.str_param(name, 'name')
        environment_files = check.opt_list_param(environment_files, 'environment_files')
        solid_subset = check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
        mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)

        file_set = set()
        for file_glob in environment_files or []:
            files = glob(file_glob)
            if not files:
                raise DagsterInvalidDefinitionError(
                    'File or glob pattern "{file_glob}" for "environment_files" in preset '
                    '"{name}" produced no results.'.format(name=name, file_glob=file_glob)
                )

            file_set.update(map(os.path.realpath, files))

        try:
            merged = merge_yamls(list(file_set))
        except yaml.YAMLError as err:
            six.raise_from(
                DagsterInvariantViolationError(
                    'Encountered error attempting to parse yaml. Parsing files {file_set} '
                    'loaded by file/patterns {files} on preset "{name}".'.format(
                        file_set=file_set, files=environment_files, name=name
                    )
                ),
                err,
            )

        return PresetDefinition(name, merged, solid_subset, mode)
Example #8
0
def test_execute_subset_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(),
                         "env_subset.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "demo_pipeline_celery"
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
        solids_to_execute={"count_letters"},
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                           run.run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Example #9
0
def test_execute_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' %
                                           run.run_id,
                                           namespace=helm_namespace)

    assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result)
Example #10
0
def test_execute_on_celery_k8s_default(  # pylint: disable=redefined-outer-name
    dagster_docker_image,
    dagster_instance,
    helm_namespace,
    dagit_url,
):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="demo_pipeline_celery")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
Example #11
0
def test_execute_on_celery_k8s_job_api_with_legacy_configmap_set(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    # Originally, jobs needed to include "dagster-pipeline-env" to pick up needed config when
    # using the helm chart - it's no longer needed, but verify that nothing breaks if it's included
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_job_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace,
            include_dagster_pipeline_env=True,
        ),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="demo_job_celery")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
Example #12
0
def test_run_monitoring_fails_on_interrupt(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace, dagit_url
):
    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(get_test_project_environments_path(), "env.yaml"),
                os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        get_celery_job_engine_config(
            dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace
        ),
    )

    pipeline_name = "demo_job_celery"

    try:
        run_id = launch_run_over_graphql(
            dagit_url, run_config=run_config, pipeline_name=pipeline_name
        )
        start_time = time.time()
        while time.time() - start_time < 60:
            run = dagster_instance.get_run_by_id(run_id)
            if run.status == PipelineRunStatus.STARTED:
                break
            assert run.status == PipelineRunStatus.STARTING
            time.sleep(1)

        assert delete_job(get_job_name_from_run_id(run_id), helm_namespace)
        poll_for_finished_run(dagster_instance, run.run_id, timeout=120)
        assert dagster_instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE
    finally:
        log_run_events(dagster_instance, run_id)
Example #13
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):
    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(),
                         'env_filesystem.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = dagster_instance.create_run(pipeline_name=pipeline_name,
                                      environment_dict=environment_dict,
                                      tags=tags,
                                      mode='default')

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    # this is bad test but proves that we got celery configured properly
    # to get it working would involve relying on s3 / gcs for storage
    assert result['data']['startPipelineExecutionForCreatedRun'][
        '__typename'] == 'PythonError'
    assert (
        'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp'
        in result['data']['startPipelineExecutionForCreatedRun']['message'])
Example #14
0
def test_execute_queued_run_on_celery_k8s(  # pylint: disable=redefined-outer-name
    dagster_docker_image,
    dagster_instance_for_daemon,
    helm_namespace_for_daemon,
    dagit_url_for_daemon,
):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace_for_daemon,
        ),
    )

    run_id = launch_run_over_graphql(dagit_url_for_daemon,
                                     run_config=run_config,
                                     pipeline_name="demo_pipeline_celery")

    wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                  namespace=helm_namespace_for_daemon)

    logs = dagster_instance_for_daemon.all_logs(run_id)
    assert_events_in_order(
        logs,
        [
            "PIPELINE_ENQUEUED", "PIPELINE_DEQUEUED", "PIPELINE_STARTING",
            "PIPELINE_SUCCESS"
        ],
    )
Example #15
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(),
                         'env_filesystem.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = dagster_instance.create_run(pipeline_name=pipeline_name,
                                      environment_dict=environment_dict,
                                      tags=tags,
                                      mode='default')

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    # This is  not an ideal test but proves that we got celery configured properly.
    # We detect that by seeing an error that we know happens after config.
    assert result['data']['startPipelineExecutionForCreatedRun'][
        '__typename'] == 'PythonError'
    assert (
        'When executing count_letters.compute discovered required outputs missing from previous step'
        in result['data']['startPipelineExecutionForCreatedRun']['message'])
Example #16
0
def test_k8s_run_launcher_celery(dagster_instance):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(environments_path(), 'env.yaml'),
            os.path.join(environments_path(), 'env_filesystem.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict,
                                       tags)

    dagster_instance.launch_run(run)
    success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id)
    result = parse_raw_res(raw_logs.split('\n'))

    assert success
    assert not result.get('errors')
    assert result['data']
    # this is bad test but proves that we got celery configured properly
    # to get it working would involve relying on s3 / gcs for storage
    assert result['data']['startPipelineExecutionForCreatedRun'][
        '__typename'] == 'PythonError'
    assert (
        'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp'
        in result['data']['startPipelineExecutionForCreatedRun']['message'])
Example #17
0
def test_k8s_run_launcher_with_celery_executor_fails(
        dagster_docker_image, dagster_instance_for_k8s_run_launcher,
        helm_namespace_for_k8s_run_launcher):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace_for_k8s_run_launcher,
        ),
    )

    pipeline_name = "demo_pipeline_celery"

    with get_test_project_location_and_external_pipeline(pipeline_name) as (
            location,
            external_pipeline,
    ):
        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            pipeline_snapshot=external_pipeline.pipeline_snapshot,
            execution_plan_snapshot=location.get_external_execution_plan(
                external_pipeline, run_config, "default", None,
                None).execution_plan_snapshot,
        )
        dagster_instance_for_k8s_run_launcher.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        timeout = datetime.timedelta(0, 120)

        found_pipeline_failure = False

        start_time = datetime.datetime.now()

        while datetime.datetime.now() < start_time + timeout:
            event_records = dagster_instance_for_k8s_run_launcher.all_logs(
                run.run_id)

            for event_record in event_records:
                if event_record.dagster_event:
                    if (event_record.dagster_event.event_type ==
                            DagsterEventType.PIPELINE_INIT_FAILURE):
                        found_pipeline_failure = True

            if found_pipeline_failure:
                break

            time.sleep(5)

        assert found_pipeline_failure
        assert (dagster_instance_for_k8s_run_launcher.get_run_by_id(
            run.run_id).status == PipelineRunStatus.FAILURE)
Example #18
0
    def from_files(name,
                   environment_files=None,
                   solid_selection=None,
                   mode=None):
        '''Static constructor for presets from YAML files.

        Args:
            name (str): The name of this preset. Must be unique in the presets defined on a given
                pipeline.
            environment_files (Optional[List[str]]): List of paths or glob patterns for yaml files
                to load and parse as the environment config for this preset.
            solid_selection (Optional[List[str]]): A list of solid subselection (including single
                solid names) to execute with the preset. e.g. ['*some_solid+', 'other_solid']
            mode (Optional[str]): The mode to apply when executing this preset. (default:
                'default')

        Returns:
            PresetDefinition: A PresetDefinition constructed from the provided YAML files.

        Raises:
            DagsterInvariantViolationError: When one of the YAML files is invalid and has a parse
                error.
        '''
        check.str_param(name, 'name')
        environment_files = check.opt_list_param(environment_files,
                                                 'environment_files')
        solid_selection = check.opt_nullable_list_param(solid_selection,
                                                        'solid_selection',
                                                        of_type=str)
        mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)

        filenames = []
        for file_glob in environment_files or []:
            globbed_files = glob(file_glob)
            if not globbed_files:
                raise DagsterInvalidDefinitionError(
                    'File or glob pattern "{file_glob}" for "environment_files" in preset '
                    '"{name}" produced no results.'.format(
                        name=name, file_glob=file_glob))

            filenames += [
                os.path.realpath(globbed_file)
                for globbed_file in globbed_files
            ]

        try:
            merged = merge_yamls(filenames)
        except yaml.YAMLError as err:
            six.raise_from(
                DagsterInvariantViolationError(
                    'Encountered error attempting to parse yaml. Parsing files {file_set} '
                    'loaded by file/patterns {files} on preset "{name}".'.
                    format(file_set=filenames,
                           files=environment_files,
                           name=name)),
                err,
            )

        return PresetDefinition(name, merged, solid_selection, mode)
Example #19
0
def test_execute_on_celery_k8s_retry_pipeline(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "retry_pipeline"
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
    )

    with get_test_project_external_pipeline(
            pipeline_name) as external_pipeline:
        dagster_instance.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                               run.run_id,
                                               namespace=helm_namespace)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)

        stats = dagster_instance.get_run_stats(run.run_id)
        assert stats.steps_succeeded == 1

        assert DagsterEventType.STEP_START in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]

        assert DagsterEventType.STEP_UP_FOR_RETRY in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]

        assert DagsterEventType.STEP_RESTARTED in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]

        assert DagsterEventType.STEP_SUCCESS in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]
Example #20
0
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        'image': docker_image,
        'env_vars': ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client('ecr', region_name='us-west-1')
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token['authorizationData'][0]
            ['authorizationToken']).decode().split(':'))
        registry = token['authorizationData'][0]['proxyEndpoint']

        docker_config['registry'] = {
            'url': registry,
            'username': username,
            'password': password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                'Found existing image tagged {image}, skipping image build. To rebuild, first run: '
                'docker rmi {image}'.format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), 'env.yaml'),
                os.path.join(test_project_environments_path(), 'env_s3.yaml'),
            ]),
            {
                'execution': {
                    'celery-docker': {
                        'config': {
                            'docker': docker_config,
                            'config_source': {
                                'task_always_eager': True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline('docker_celery_pipeline'),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
Example #21
0
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        "image": docker_image,
        "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client("ecr", region_name="us-west-1")
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token["authorizationData"][0]
            ["authorizationToken"]).decode().split(":"))
        registry = token["authorizationData"][0]["proxyEndpoint"]

        docker_config["registry"] = {
            "url": registry,
            "username": username,
            "password": password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                "Found existing image tagged {image}, skipping image build. To rebuild, first run: "
                "docker rmi {image}".format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), "env.yaml"),
                os.path.join(test_project_environments_path(), "env_s3.yaml"),
            ]),
            {
                "execution": {
                    "celery-docker": {
                        "config": {
                            "docker": docker_config,
                            "config_source": {
                                "task_always_eager": True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline("docker_celery_pipeline"),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
def test_terminate_launched_docker_run():
    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",],
        "network": "container:test-postgres-db-docker",
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls([os.path.join(get_test_project_environments_path(), "env_s3.yaml"),])

    with docker_postgres_instance(
        overrides={
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            }
        }
    ) as instance:
        recon_pipeline = get_test_project_recon_pipeline("hanging_pipeline", docker_image)
        run = instance.create_run_for_pipeline(
            pipeline_def=recon_pipeline.get_definition(), run_config=run_config,
        )

        run_id = run.run_id

        external_pipeline = ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline("hanging_pipeline", container_image=docker_image),
            container_image=docker_image,
        )
        instance.launch_run(run_id, external_pipeline)

        poll_for_step_start(instance, run_id)

        assert instance.run_launcher.can_terminate(run_id)
        assert instance.run_launcher.terminate(run_id)

        terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30)
        terminated_pipeline_run = instance.get_run_by_id(run_id)
        assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED

        run_logs = instance.all_logs(run_id)

        _check_event_log_contains(
            run_logs,
            [
                ("PIPELINE_CANCELING", "Sending pipeline termination request"),
                ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'),
                ("PIPELINE_CANCELED", 'Execution of pipeline "hanging_pipeline" canceled.'),
                ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"),
                ("ENGINE_EVENT", "Process for pipeline exited"),
            ],
        )
Example #23
0
def test_launch_docker_image_on_pipeline_config():
    # Docker image name to use for launch specified as part of the pipeline origin
    # rather than in the run launcher instance config

    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
        "network": "container:test-postgres-db-docker",
        "container_kwargs": {
            "auto_remove": True,
        },
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls([
        os.path.join(get_test_project_environments_path(), "env.yaml"),
        os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
    ])

    with docker_postgres_instance(
            overrides={
                "run_launcher": {
                    "class": "DockerRunLauncher",
                    "module": "dagster_docker",
                    "config": launcher_config,
                }
            }) as instance:
        recon_pipeline = get_test_project_recon_pipeline(
            "demo_pipeline", docker_image)
        run = instance.create_run_for_pipeline(
            pipeline_def=recon_pipeline.get_definition(),
            run_config=run_config,
        )

        with get_test_project_external_pipeline(
                "demo_pipeline",
                container_image=docker_image) as orig_pipeline:
            external_pipeline = ReOriginatedExternalPipelineForTest(
                orig_pipeline,
                container_image=docker_image,
            )
            instance.launch_run(run.run_id, external_pipeline)

            poll_for_finished_run(instance, run.run_id, timeout=60)

            run = instance.get_run_by_id(run.run_id)

            assert run.status == PipelineRunStatus.SUCCESS

            assert run.tags[DOCKER_IMAGE_TAG] == docker_image
Example #24
0
def test_memoization_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    ephemeral_prefix = str(uuid.uuid4())
    run_config = deep_merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )
    run_config = deep_merge_dicts(
        run_config,
        {
            "resources": {
                "io_manager": {
                    "config": {
                        "s3_prefix": ephemeral_prefix
                    }
                }
            }
        },
    )

    try:

        run_ids = []
        for _ in range(2):
            run_id = launch_run_over_graphql(
                dagit_url,
                run_config=run_config,
                pipeline_name="memoization_pipeline",
                mode="celery",
            )

            result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                                   run_id,
                                                   namespace=helm_namespace)

            assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
                result)

            run_ids.append(run_id)

        unmemoized_run_id = run_ids[0]
        step_events = _get_step_events(
            dagster_instance.all_logs(unmemoized_run_id))
        assert len(step_events) == 4

        memoized_run_id = run_ids[1]
        step_events = _get_step_events(
            dagster_instance.all_logs(memoized_run_id))
        assert len(step_events) == 0

    finally:
        cleanup_memoized_results(define_memoization_pipeline(), "celery",
                                 dagster_instance, run_config)
Example #25
0
def test_execute_retry_pipeline_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls(
            [os.path.join(test_project_environments_path(), 'env_s3.yaml')]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 helm_namespace=helm_namespace),
    )

    pipeline_name = 'retry_pipeline'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))

    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)

    stats = dagster_instance.get_run_stats(run.run_id)
    assert stats.steps_succeeded == 1

    assert DagsterEventType.STEP_START in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_UP_FOR_RETRY in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_RESTARTED in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_SUCCESS in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]
Example #26
0
def test_execute_on_celery_k8s_with_hard_failure(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance,
        set_dagster_k8s_pipeline_run_namespace_env, dagit_url):
    run_config = merge_dicts(
        merge_dicts(
            merge_yamls([
                os.path.join(get_test_project_environments_path(),
                             "env_s3.yaml"),
            ]),
            get_celery_engine_config(
                dagster_docker_image=dagster_docker_image,
                job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
            ),
        ),
        {"solids": {
            "hard_fail_or_0": {
                "config": {
                    "fail": True
                }
            }
        }},
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="hard_failer")

    # Check that pipeline run is marked as failed
    pipeline_run_status_failure = False
    start_time = datetime.datetime.now()
    timeout = datetime.timedelta(0, 120)

    while datetime.datetime.now() < start_time + timeout:
        pipeline_run = dagster_instance.get_run_by_id(run_id)
        if pipeline_run.status == PipelineRunStatus.FAILURE:
            pipeline_run_status_failure = True
            break
        time.sleep(5)
    assert pipeline_run_status_failure

    # Check for step failure for hard_fail_or_0.compute
    start_time = datetime.datetime.now()
    step_failure_found = False
    while datetime.datetime.now() < start_time + timeout:
        event_records = dagster_instance.all_logs(run_id)
        for event_record in event_records:
            if event_record.dagster_event:
                if (event_record.dagster_event.event_type
                        == DagsterEventType.STEP_FAILURE
                        and event_record.dagster_event.step_key
                        == "hard_fail_or_0"):
                    step_failure_found = True
                    break
        time.sleep(5)
    assert step_failure_found
Example #27
0
def test_execute_on_celery_k8s_with_termination(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    _test_termination(dagster_instance, run_config)
Example #28
0
def test_execute_on_celery_k8s_with_env_var_and_termination(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env
):
    run_config = merge_dicts(
        merge_yamls([os.path.join(get_test_project_environments_path(), "env_s3.yaml"),]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
        ),
    )

    _test_termination(dagster_instance, run_config)
Example #29
0
def test_merge_yamls():
    assert merge_yamls(
        [
            file_relative_path(__file__, os.path.join("yamls", "yaml_one.yaml")),
            file_relative_path(__file__, os.path.join("yamls", "yaml_two.yaml")),
        ]
    ) == {"key_one": {"key_one_one": "value_one", "key_one_two": "value_two"}}

    with pytest.raises(
        check.CheckError,
        match=(
            "Expected YAML from file .* to parse to dictionary, "
            'instead got: "this is a valid YAML string but not a dictionary"'
        ),
    ):
        merge_yamls(
            [
                file_relative_path(__file__, os.path.join("yamls", "yaml_one.yaml")),
                file_relative_path(__file__, os.path.join("yamls", "bad", "a_string.yaml")),
            ]
        )
Example #30
0
def test_merge_yamls():
    assert merge_yamls([
        file_relative_path(__file__, 'yamls/yaml_one.yaml'),
        file_relative_path(__file__, 'yamls/yaml_two.yaml'),
    ]) == {
        'key_one': {
            'key_one_one': 'value_one',
            'key_one_two': 'value_two'
        }
    }

    with pytest.raises(
            check.CheckError,
            match=
        ('Expected YAML from file .*?/yamls/bad/a_string.yaml to parse to dictionary, '
         'instead got: "this is a valid YAML string but not a dictionary"'),
    ):
        merge_yamls([
            file_relative_path(__file__, 'yamls/yaml_one.yaml'),
            file_relative_path(__file__, 'yamls/bad/a_string.yaml'),
        ])