예제 #1
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (result['data']['startPipelineExecutionForCreatedRun']['__typename']
            == 'StartPipelineRunSuccess')
예제 #2
0
def test_k8s_run_launcher_celery(dagster_instance):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(environments_path(), 'env.yaml'),
            os.path.join(environments_path(), 'env_filesystem.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict,
                                       tags)

    dagster_instance.launch_run(run)
    success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id)
    result = parse_raw_res(raw_logs.split('\n'))

    assert success
    assert not result.get('errors')
    assert result['data']
    # this is bad test but proves that we got celery configured properly
    # to get it working would involve relying on s3 / gcs for storage
    assert result['data']['startPipelineExecutionForCreatedRun'][
        '__typename'] == 'PythonError'
    assert (
        'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp'
        in result['data']['startPipelineExecutionForCreatedRun']['message'])
예제 #3
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in run_config['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
예제 #4
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):
    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(),
                         'env_filesystem.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = dagster_instance.create_run(pipeline_name=pipeline_name,
                                      environment_dict=environment_dict,
                                      tags=tags,
                                      mode='default')

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    # this is bad test but proves that we got celery configured properly
    # to get it working would involve relying on s3 / gcs for storage
    assert result['data']['startPipelineExecutionForCreatedRun'][
        '__typename'] == 'PythonError'
    assert (
        'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp'
        in result['data']['startPipelineExecutionForCreatedRun']['message'])
예제 #5
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(),
                         'env_filesystem.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = dagster_instance.create_run(pipeline_name=pipeline_name,
                                      environment_dict=environment_dict,
                                      tags=tags,
                                      mode='default')

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    # This is  not an ideal test but proves that we got celery configured properly.
    # We detect that by seeing an error that we know happens after config.
    assert result['data']['startPipelineExecutionForCreatedRun'][
        '__typename'] == 'PythonError'
    assert (
        'When executing count_letters.compute discovered required outputs missing from previous step'
        in result['data']['startPipelineExecutionForCreatedRun']['message'])
예제 #6
0
    def frequent_large_pipe(_):
        from dagster_k8s import get_celery_engine_config

        cfg = get_celery_engine_config()
        cfg['storage'] = {
            's3': {
                'config': {
                    's3_bucket': 'dagster-scratch-80542c2'
                }
            }
        }
        return cfg
예제 #7
0
def test_get_validated_celery_k8s_executor_config():
    res = _get_validated_celery_k8s_executor_config(
        {'execution': {CELERY_K8S_CONFIG_KEY: {'config': {'job_image': 'foo'}}}}
    )

    assert res == {
        'backend': 'rpc://',
        'retries': {'enabled': {}},
        'job_image': 'foo',
        'image_pull_policy': 'IfNotPresent',
        'load_incluster_config': True,
        'job_namespace': 'default',
        'repo_location_name': '<<in_process>>',
    }

    with pytest.raises(
        check.CheckError,
        match='Description: celery-k8s execution must be configured in pipeline execution config to'
        ' launch runs with CeleryK8sRunLauncher',
    ):
        _get_validated_celery_k8s_executor_config({})

    with environ(
        {
            'DAGSTER_K8S_PIPELINE_RUN_IMAGE': 'foo',
            'DAGSTER_K8S_PIPELINE_RUN_NAMESPACE': 'default',
            'DAGSTER_K8S_PIPELINE_RUN_IMAGE_PULL_POLICY': 'Always',
            'DAGSTER_K8S_PIPELINE_RUN_ENV_CONFIGMAP': 'config-pipeline-env',
        }
    ):
        cfg = get_celery_engine_config()
        res = _get_validated_celery_k8s_executor_config(cfg)
        assert res == {
            'backend': 'rpc://',
            'retries': {'enabled': {}},
            'job_image': 'foo',
            'image_pull_policy': 'Always',
            'env_config_maps': ['config-pipeline-env'],
            'load_incluster_config': True,
            'job_namespace': 'default',
            'repo_location_name': '<<in_process>>',
        }

    # Test setting all possible config fields
    with environ(
        {
            'TEST_PIPELINE_RUN_NAMESPACE': 'default',
            'TEST_CELERY_BROKER': 'redis://some-redis-host:6379/0',
            'TEST_CELERY_BACKEND': 'redis://some-redis-host:6379/0',
            'TEST_PIPELINE_RUN_IMAGE': 'foo',
            'TEST_PIPELINE_RUN_IMAGE_PULL_POLICY': 'Always',
            'TEST_K8S_PULL_SECRET_1': 'super-secret-1',
            'TEST_K8S_PULL_SECRET_2': 'super-secret-2',
            'TEST_SERVICE_ACCOUNT_NAME': 'my-cool-service-acccount',
            'TEST_PIPELINE_RUN_ENV_CONFIGMAP': 'config-pipeline-env',
            'TEST_SECRET': 'config-secret-env',
        }
    ):

        cfg = {
            'execution': {
                CELERY_K8S_CONFIG_KEY: {
                    'config': {
                        'repo_location_name': '<<in_process>>',
                        'load_incluster_config': False,
                        'kubeconfig_file': '/some/kubeconfig/file',
                        'job_namespace': {'env': 'TEST_PIPELINE_RUN_NAMESPACE'},
                        'broker': {'env': 'TEST_CELERY_BROKER'},
                        'backend': {'env': 'TEST_CELERY_BACKEND'},
                        'include': ['dagster', 'dagit'],
                        'config_source': {
                            'task_annotations': '''{'*': {'on_failure': my_on_failure}}'''
                        },
                        'retries': {'disabled': {}},
                        'job_image': {'env': 'TEST_PIPELINE_RUN_IMAGE'},
                        'image_pull_policy': {'env': 'TEST_PIPELINE_RUN_IMAGE_PULL_POLICY'},
                        'image_pull_secrets': [
                            {'name': {'env': 'TEST_K8S_PULL_SECRET_1'}},
                            {'name': {'env': 'TEST_K8S_PULL_SECRET_2'}},
                        ],
                        'service_account_name': {'env': 'TEST_SERVICE_ACCOUNT_NAME'},
                        'env_config_maps': [{'env': 'TEST_PIPELINE_RUN_ENV_CONFIGMAP'}],
                        'env_secrets': [{'env': 'TEST_SECRET'}],
                    }
                }
            }
        }

        res = _get_validated_celery_k8s_executor_config(cfg)
        assert res == {
            'repo_location_name': '<<in_process>>',
            'load_incluster_config': False,
            'kubeconfig_file': '/some/kubeconfig/file',
            'job_namespace': 'default',
            'backend': 'redis://some-redis-host:6379/0',
            'broker': 'redis://some-redis-host:6379/0',
            'include': ['dagster', 'dagit'],
            'config_source': {'task_annotations': '''{'*': {'on_failure': my_on_failure}}'''},
            'retries': {'disabled': {}},
            'job_image': 'foo',
            'image_pull_policy': 'Always',
            'image_pull_secrets': [{'name': 'super-secret-1'}, {'name': 'super-secret-2'}],
            'service_account_name': 'my-cool-service-acccount',
            'env_config_maps': ['config-pipeline-env'],
            'env_secrets': ['config-secret-env'],
        }