Example #1
0
def test_execute_on_celery_k8s(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(test_project_environments_path(), "env.yaml"),
                os.path.join(test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace
        ),
    )

    pipeline_name = "demo_pipeline_celery"
    run = create_run_for_test(
        dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default",
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace
    )

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Example #2
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in run_config['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
Example #3
0
def test_execute_subset_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env_subset.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
        solids_to_execute={'count_letters'},
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' %
                                           run.run_id,
                                           namespace=helm_namespace)

    assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result)
Example #4
0
def test_execute_on_celery(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 helm_namespace=helm_namespace),
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))

    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
Example #5
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (result['data']['startPipelineExecutionForCreatedRun']['__typename']
            == 'StartPipelineRunSuccess')
Example #6
0
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        "image": docker_image,
        "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client("ecr", region_name="us-west-1")
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token["authorizationData"][0]
            ["authorizationToken"]).decode().split(":"))
        registry = token["authorizationData"][0]["proxyEndpoint"]

        docker_config["registry"] = {
            "url": registry,
            "username": username,
            "password": password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                "Found existing image tagged {image}, skipping image build. To rebuild, first run: "
                "docker rmi {image}".format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), "env.yaml"),
                os.path.join(test_project_environments_path(), "env_s3.yaml"),
            ]),
            {
                "execution": {
                    "celery-docker": {
                        "config": {
                            "docker": docker_config,
                            "config_source": {
                                "task_always_eager": True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline("docker_celery_pipeline"),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
Example #7
0
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        'image': docker_image,
        'env_vars': ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client('ecr', region_name='us-west-1')
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token['authorizationData'][0]
            ['authorizationToken']).decode().split(':'))
        registry = token['authorizationData'][0]['proxyEndpoint']

        docker_config['registry'] = {
            'url': registry,
            'username': username,
            'password': password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                'Found existing image tagged {image}, skipping image build. To rebuild, first run: '
                'docker rmi {image}'.format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), 'env.yaml'),
                os.path.join(test_project_environments_path(), 'env_s3.yaml'),
            ]),
            {
                'execution': {
                    'celery-docker': {
                        'config': {
                            'docker': docker_config,
                            'config_source': {
                                'task_always_eager': True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline('docker_celery_pipeline'),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
Example #8
0
def test_execute_on_celery(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    environment_dict = merge_dicts(
        merge_yamls(
            [
                os.path.join(test_project_environments_path(), 'env.yaml'),
                os.path.join(test_project_environments_path(), 'env_s3.yaml'),
            ]
        ),
        {
            'execution': {
                'celery-k8s': {
                    'config': {
                        'broker': {'env': 'DAGSTER_K8S_CELERY_BROKER'},
                        'backend': {'env': 'DAGSTER_K8S_CELERY_BACKEND'},
                        'job_image': dagster_docker_image,
                        'job_namespace': helm_namespace,
                        'instance_config_map': 'dagster-instance',
                        'postgres_password_secret': 'dagster-postgresql-secret',
                        'image_pull_policy': 'Always',
                        'env_config_maps': ['dagster-pipeline-env'],
                    }
                }
            },
        },
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        mode='default',
    )

    dagster_instance.launch_run(run.run_id)

    result = wait_for_job_and_get_logs(
        job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace
    )

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['startPipelineExecutionForCreatedRun']['__typename']
        == 'StartPipelineRunSuccess'
    )
Example #9
0
def test_error_dag_k8s(dagster_docker_image, cluster_provider):
    _check_aws_creds_available()

    pipeline_name = "demo_error_pipeline"
    recon_repo = ReconstructableRepository.for_module(
        "dagster_test.test_project.test_pipelines.repo",
        "define_demo_execution_repo")
    environments_path = test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, "env_s3.yaml"),
    ]
    run_config = load_yaml_from_glob_list(environment_yaml)

    run_id = make_new_run_id()
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_kubernetized_for_recon_repo(
        recon_repo=recon_repo,
        pipeline_name=pipeline_name,
        image=dagster_docker_image,
        namespace="default",
        run_config=run_config,
        op_kwargs={
            "config_file": os.environ["KUBECONFIG"],
            "env_vars": {
                "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"],
                "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"],
            },
        },
    )

    with pytest.raises(AirflowException) as exc_info:
        execute_tasks_in_dag(dag, tasks, run_id, execution_date)

    assert "Exception: Unusual error" in str(exc_info.value)
Example #10
0
def test_s3_storage(dagster_airflow_k8s_operator_pipeline,
                    dagster_docker_image, cluster_provider):
    _check_aws_creds_available()
    environments_path = test_project_environments_path()

    pipeline_name = "demo_pipeline"
    results = dagster_airflow_k8s_operator_pipeline(
        pipeline_name=pipeline_name,
        recon_repo=ReconstructableRepository.for_module(
            "dagster_test.test_project.test_pipelines.repo",
            "define_demo_execution_repo",
        ),
        environment_yaml=[
            os.path.join(environments_path, "env.yaml"),
            os.path.join(environments_path, "env_s3.yaml"),
        ],
        image=dagster_docker_image,
        op_kwargs={
            "config_file": os.environ["KUBECONFIG"],
            "env_vars": {
                "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"],
                "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"],
            },
        },
    )
    validate_pipeline_execution(results)
Example #11
0
def test_airflow_execution_date_tags():
    pipeline_name = 'demo_airflow_execution_date_pipeline'
    recon_repo = ReconstructableRepository.for_module(
        'dagster_test.test_project.test_pipelines.repo', pipeline_name)
    environments_path = test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, 'env_filesystem.yaml'),
    ]
    run_config = load_yaml_from_glob_list(environment_yaml)
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_for_recon_repo(recon_repo, pipeline_name,
                                                 run_config)

    results = execute_tasks_in_dag(dag,
                                   tasks,
                                   run_id=make_new_run_id(),
                                   execution_date=execution_date)

    materialized_airflow_execution_date = None
    for result in results.values():
        for event in result:
            if event.event_type_value == 'STEP_MATERIALIZATION':
                materialization = event.event_specific_data.materialization
                materialization_entry = materialization.metadata_entries[0]
                materialized_airflow_execution_date = materialization_entry.entry_data.text

    assert execution_date.isoformat() == materialized_airflow_execution_date
Example #12
0
def test_error_dag_containerized(dagster_docker_image):  # pylint: disable=redefined-outer-name
    pipeline_name = "demo_error_pipeline"
    recon_repo = ReconstructableRepository.for_module(
        "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo"
    )
    environments_path = test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, "env_s3.yaml"),
    ]
    run_config = load_yaml_from_glob_list(environment_yaml)

    run_id = make_new_run_id()
    execution_date = timezone.utcnow()

    with postgres_instance() as instance:

        dag, tasks = make_airflow_dag_containerized_for_recon_repo(
            recon_repo,
            pipeline_name,
            dagster_docker_image,
            run_config,
            instance=instance,
            op_kwargs={"network_mode": "container:test-postgres-db-airflow"},
        )

        with pytest.raises(AirflowException) as exc_info:
            execute_tasks_in_dag(dag, tasks, run_id, execution_date)

        assert "Exception: Unusual error" in str(exc_info.value)
Example #13
0
def test_s3_storage(dagster_airflow_k8s_operator_pipeline,
                    dagster_docker_image, cluster_provider):
    print('--- :airflow: test_kubernetes.test_s3_storage')
    _check_aws_creds_available()
    environments_path = test_project_environments_path()

    pipeline_name = 'demo_pipeline'
    results = dagster_airflow_k8s_operator_pipeline(
        pipeline_name=pipeline_name,
        recon_repo=ReconstructableRepository.for_module(
            'dagster_test.test_project.test_pipelines.repo',
            'define_demo_execution_repo',
        ),
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path, 'env_s3.yaml'),
        ],
        image=dagster_docker_image,
        op_kwargs={
            'config_file': os.environ['KUBECONFIG'],
            'env_vars': {
                'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'],
                'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'],
            },
        },
    )
    validate_pipeline_execution(results)
Example #14
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        pod_name=pod_name,
        component='run_coordinator',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='',
        ).strip())
Example #15
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    environment_dict = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name,
                      environment_dict=environment_dict)

    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_graphql_k8s_job(
        run_launcher.job_config,
        args=[
            '-p',
            'startPipelineExecutionForCreatedRun',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
        ).strip())
Example #16
0
def test_airflow_execution_date_tags_containerized(dagster_docker_image, ):  # pylint: disable=redefined-outer-name, unused-argument
    pipeline_name = "demo_airflow_execution_date_pipeline"
    recon_repo = ReconstructableRepository.for_module(
        "dagster_test.test_project.test_pipelines.repo",
        "define_demo_execution_repo")
    environments_path = test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, "env_s3.yaml"),
    ]
    run_config = load_yaml_from_glob_list(environment_yaml)

    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_containerized_for_recon_repo(
        recon_repo, pipeline_name, dagster_docker_image, run_config)

    results = execute_tasks_in_dag(dag,
                                   tasks,
                                   run_id=make_new_run_id(),
                                   execution_date=execution_date)

    materialized_airflow_execution_date = None
    for result in results.values():
        for event in result:
            if event.event_type_value == "STEP_MATERIALIZATION":
                materialization = event.event_specific_data.materialization
                materialization_entry = materialization.metadata_entries[0]
                materialized_airflow_execution_date = materialization_entry.entry_data.text

    assert execution_date.isoformat() == materialized_airflow_execution_date
Example #17
0
def test_error_dag_k8s(dagster_docker_image, cluster_provider):
    print('--- :airflow: test_kubernetes.test_error_dag_k8s')
    _check_aws_creds_available()

    pipeline_name = 'demo_error_pipeline'
    handle = ReconstructableRepository.for_module('test_pipelines.repo',
                                                  pipeline_name)
    environments_path = test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, 'env_s3.yaml'),
    ]
    environment_dict = load_yaml_from_glob_list(environment_yaml)

    run_id = make_new_run_id()
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_kubernetized_for_handle(
        handle=handle,
        pipeline_name=pipeline_name,
        image=dagster_docker_image,
        namespace='default',
        environment_dict=environment_dict,
        op_kwargs={
            'config_file': os.environ['KUBECONFIG'],
            'env_vars': {
                'AWS_ACCESS_KEY_ID': os.environ['AWS_ACCESS_KEY_ID'],
                'AWS_SECRET_ACCESS_KEY': os.environ['AWS_SECRET_ACCESS_KEY'],
            },
        },
    )

    with pytest.raises(AirflowException) as exc_info:
        execute_tasks_in_dag(dag, tasks, run_id, execution_date)

    assert 'Exception: Unusual error' in str(exc_info.value)
Example #18
0
def test_k8s_run_launcher_default(
    dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher
):
    # sanity check that we have a K8sRunLauncher
    check.inst(dagster_instance_for_k8s_run_launcher.run_launcher, K8sRunLauncher)
    pods = DagsterKubernetesClient.production_client().core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher
    )
    celery_pod_names = [p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name]
    check.invariant(not celery_pod_names)

    run_config = load_yaml_from_path(os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    tags = {"key": "value"}
    run = create_run_for_test(
        dagster_instance_for_k8s_run_launcher,
        pipeline_name=pipeline_name,
        run_config=run_config,
        tags=tags,
        mode="default",
    )

    dagster_instance_for_k8s_run_launcher.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher
    )

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Example #19
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster"],
        args=["api", "execute_run_with_structured_logs"],
        job_name=job_name,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="",
        ).strip())
Example #20
0
def test_my_custom_operator(
    dagster_airflow_custom_operator_pipeline,
    caplog,
):  # pylint: disable=redefined-outer-name
    caplog.set_level(logging.INFO, logger='CustomOperatorLogger')
    pipeline_name = 'demo_pipeline'
    operator = CustomOperator

    environments_path = test_project_environments_path()

    results = dagster_airflow_custom_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ReconstructableRepository.for_module('test_pipelines.repo',
                                                    pipeline_name),
        operator=operator,
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path,
                         'env_filesystem_no_explicit_base_dir.yaml'),
        ],
    )
    validate_pipeline_execution(results)

    log_lines = 0
    for record in caplog.records:
        if record.name == 'CustomOperatorLogger':
            log_lines += 1
            assert record.message == 'CustomOperator is called'

    assert log_lines == 2
Example #21
0
def test_execute_on_celery_resource_requirements(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    run_config = merge_dicts(
        merge_yamls([os.path.join(test_project_environments_path(), 'env_s3.yaml'),]),
        {
            'execution': {
                'celery-k8s': {
                    'config': {
                        'job_image': dagster_docker_image,
                        'job_namespace': helm_namespace,
                        'image_pull_policy': 'Always',
                        'env_config_maps': ['dagster-pipeline-env'],
                    }
                }
            },
        },
    )

    pipeline_name = 'resources_limit_pipeline_celery'
    run = create_run_for_test(
        dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default',
    )

    dagster_instance.launch_run(run.run_id, get_test_project_external_pipeline(pipeline_name))

    result = wait_for_job_and_get_logs(
        job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace
    )

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'
    ), 'no match, result: {}'.format(result)
Example #22
0
def test_execute_on_celery_k8s_retry_pipeline(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls(
            [os.path.join(test_project_environments_path(), 'env_s3.yaml')]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = 'retry_pipeline'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' %
                                           run.run_id,
                                           namespace=helm_namespace)

    assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result)

    stats = dagster_instance.get_run_stats(run.run_id)
    assert stats.steps_succeeded == 1

    assert DagsterEventType.STEP_START in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_UP_FOR_RETRY in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_RESTARTED in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_SUCCESS in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run.run_id)
        if event.is_dagster_event
    ]
Example #23
0
def test_execute_on_celery_k8s_with_termination(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    _test_termination(dagster_instance, run_config)
Example #24
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            "requests": {
                "cpu": "250m",
                "memory": "64Mi"
            },
            "limits": {
                "cpu": "500m",
                "memory": "2560Mi"
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster-graphql"],
        args=[
            "-p",
            "executeRunInProcess",
            "-v",
            seven.json.dumps({"runId": run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
        ).strip())
Example #25
0
def test_skip_operator(
    dagster_airflow_python_operator_pipeline,
):  # pylint: disable=redefined-outer-name
    pipeline_name = 'optional_outputs'
    environments_path = test_project_environments_path()
    results = dagster_airflow_python_operator_pipeline(
        pipeline_name=pipeline_name,
        recon_repo=ReconstructableRepository.for_module('test_pipelines.repo', pipeline_name),
        environment_yaml=[os.path.join(environments_path, 'env_filesystem.yaml')],
    )
    validate_skip_pipeline_execution(results)
Example #26
0
def test_valid_job_format_with_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            'requests': {
                'cpu': '250m',
                'memory': '64Mi'
            },
            'limits': {
                'cpu': '500m',
                'memory': '2560Mi'
            },
        })
    })
    resources = get_k8s_resource_requirements(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        resources=resources,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
        ).strip())
Example #27
0
def test_execute_on_celery_k8s_with_hard_failure(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env
):
    run_config = merge_dicts(
        merge_dicts(
            merge_yamls([os.path.join(test_project_environments_path(), "env_s3.yaml"),]),
            get_celery_engine_config(
                dagster_docker_image=dagster_docker_image,
                job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
            ),
        ),
        {"solids": {"hard_fail_or_0": {"config": {"fail": True}}}},
    )

    pipeline_name = "hard_failer"
    run = create_run_for_test(
        dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default",
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)),
    )
    assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher)

    # Check that pipeline run is marked as failed
    pipeline_run_status_failure = False
    start_time = datetime.datetime.now()
    timeout = datetime.timedelta(0, 120)

    while datetime.datetime.now() < start_time + timeout:
        pipeline_run = dagster_instance.get_run_by_id(run.run_id)
        if pipeline_run.status == PipelineRunStatus.FAILURE:
            pipeline_run_status_failure = True
            break
        time.sleep(5)
    assert pipeline_run_status_failure

    # Check for step failure for hard_fail_or_0.compute
    start_time = datetime.datetime.now()
    step_failure_found = False
    while datetime.datetime.now() < start_time + timeout:
        event_records = dagster_instance.all_logs(run.run_id)
        for event_record in event_records:
            if event_record.dagster_event:
                if (
                    event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE
                    and event_record.dagster_event.step_key == "hard_fail_or_0.compute"
                ):
                    step_failure_found = True
                    break
        time.sleep(5)
    assert step_failure_found
Example #28
0
def test_execute_on_celery_k8s_with_env_var_and_termination(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env
):
    run_config = merge_dicts(
        merge_yamls([os.path.join(test_project_environments_path(), "env_s3.yaml"),]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
        ),
    )

    _test_termination(dagster_instance, run_config)
Example #29
0
def test_gcs_storage(dagster_airflow_python_operator_pipeline, ):  # pylint: disable=redefined-outer-name
    pipeline_name = 'demo_pipeline_gcs'
    environments_path = test_project_environments_path()
    results = dagster_airflow_python_operator_pipeline(
        pipeline_name=pipeline_name,
        handle=ReconstructableRepository.for_module('test_pipelines.repo',
                                                    pipeline_name),
        environment_yaml=[
            os.path.join(environments_path, 'env.yaml'),
            os.path.join(environments_path, 'env_gcs.yaml'),
        ],
    )
    validate_pipeline_execution(results)
Example #30
0
def test_s3_storage(dagster_airflow_python_operator_pipeline, ):  # pylint: disable=redefined-outer-name
    pipeline_name = "demo_pipeline"
    environments_path = test_project_environments_path()
    results = dagster_airflow_python_operator_pipeline(
        pipeline_name=pipeline_name,
        recon_repo=ReconstructableRepository.for_module(
            "dagster_test.test_project.test_pipelines.repo", pipeline_name),
        environment_yaml=[
            os.path.join(environments_path, "env.yaml"),
            os.path.join(environments_path, "env_s3.yaml"),
        ],
    )
    validate_pipeline_execution(results)