コード例 #1
0
def sync_get_external_repositories_grpc(api_client,
                                        repository_location_handle):
    check.inst_param(repository_location_handle, 'repository_location_handle',
                     RepositoryLocationHandle)

    repos = []
    for repository_name in repository_location_handle.repository_names:
        external_repository_data = check.inst(
            api_client.external_repository(
                repository_grpc_server_origin=RepositoryGrpcServerOrigin(
                    repository_location_handle.host,
                    repository_location_handle.port,
                    repository_location_handle.socket,
                    repository_name,
                )),
            ExternalRepositoryData,
        )
        repos.append(
            ExternalRepository(
                external_repository_data,
                RepositoryHandle(
                    repository_name=external_repository_data.name,
                    repository_location_handle=repository_location_handle,
                ),
            ))
    return repos
コード例 #2
0
def sync_get_streaming_external_repositories_grpc(api_client, repository_location_handle):
    check.inst_param(
        repository_location_handle, "repository_location_handle", RepositoryLocationHandle
    )

    repos = []
    for repository_name in repository_location_handle.repository_names:
        external_repository_chunks = list(
            api_client.streaming_external_repository(
                external_repository_origin=ExternalRepositoryOrigin(
                    repository_location_handle.origin, repository_name,
                )
            )
        )

        external_repository_data = deserialize_json_to_dagster_namedtuple(
            "".join(
                [
                    chunk["serialized_external_repository_chunk"]
                    for chunk in external_repository_chunks
                ]
            )
        )

        repos.append(
            ExternalRepository(
                external_repository_data,
                RepositoryHandle(
                    repository_name=external_repository_data.name,
                    repository_location_handle=repository_location_handle,
                ),
            )
        )
    return repos
コード例 #3
0
def sync_get_external_repositories(repository_location_handle):
    check.inst_param(
        repository_location_handle, 'repository_location_handle', PythonEnvRepositoryLocationHandle,
    )

    repos = []

    for key, pointer in repository_location_handle.repository_code_pointer_dict.items():

        external_repository_data = check.inst(
            execute_unary_api_cli_command(
                repository_location_handle.executable_path,
                'repository',
                RepositoryPythonOrigin(repository_location_handle.executable_path, pointer),
            ),
            ExternalRepositoryData,
        )
        repos.append(
            ExternalRepository(
                external_repository_data,
                RepositoryHandle(
                    repository_name=external_repository_data.name,
                    repository_key=key,
                    repository_location_handle=repository_location_handle,
                ),
            )
        )

    return repos
コード例 #4
0
def test_raise_on_error(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        fail_pod_on_run_failure=True,
    )
    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    loadable_target_origin = LoadableTargetOrigin(python_file=__file__)

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, loadable_target_origin) as workspace:
            location = workspace.get_repository_location(workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(),
            )
            k8s_run_launcher.register_instance(instance)
            k8s_run_launcher.launch_run(LaunchRunContext(run, workspace))

        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"

        container = kwargs["body"].spec.template.spec.containers[0]
        args = container.args
        assert (
            args
            == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=True,
            ).get_command_args()
        )
コード例 #5
0
def sync_get_external_repositories_grpc(repository_location_handle):
    check.inst_param(
        repository_location_handle, 'repository_location_handle', PythonEnvRepositoryLocationHandle,
    )

    repos = []

    with ephemeral_grpc_api_client(
        python_executable_path=repository_location_handle.executable_path
    ) as api_client:
        for key, pointer in repository_location_handle.repository_code_pointer_dict.items():
            external_repository_data = check.inst(
                api_client.external_repository(
                    repository_python_origin=RepositoryPythonOrigin(
                        repository_location_handle.executable_path, pointer
                    )
                ),
                ExternalRepositoryData,
            )
            repos.append(
                ExternalRepository(
                    external_repository_data,
                    RepositoryHandle(
                        repository_name=external_repository_data.name,
                        repository_key=key,
                        repository_location_handle=repository_location_handle,
                    ),
                )
            )

    return repos
コード例 #6
0
def sync_get_external_repositories_grpc(api_client, repository_location_handle):
    check.inst_param(
        repository_location_handle, "repository_location_handle", RepositoryLocationHandle
    )

    repos = []
    for repository_name in repository_location_handle.repository_names:
        external_repository_data = check.inst(
            api_client.external_repository(
                external_repository_origin=ExternalRepositoryOrigin(
                    repository_location_handle.origin, repository_name,
                )
            ),
            ExternalRepositoryData,
        )
        repos.append(
            ExternalRepository(
                external_repository_data,
                RepositoryHandle(
                    repository_name=external_repository_data.name,
                    repository_location_handle=repository_location_handle,
                ),
            )
        )
    return repos
コード例 #7
0
ファイル: test_launcher.py プロジェクト: stonebig/dagster
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    # Construct Dagster run tags with user defined k8s config.
    expected_resources = {
        "requests": {"cpu": "250m", "memory": "64Mi"},
        "limits": {"cpu": "500m", "memory": "2560Mi"},
    }
    user_defined_k8s_config = UserDefinedDagsterK8sConfig(
        container_config={"resources": expected_resources},
    )
    user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict())
    tags = {"dagster-k8s/config": user_defined_k8s_config_json}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    location_origin = InProcessRepositoryLocationOrigin(recon_repo)
    location_handle = location_origin.create_handle()
    repo_def = recon_repo.get_definition()
    repo_handle = RepositoryHandle(
        repository_name=repo_def.name,
        repository_location_handle=location_handle,
    )
    fake_external_pipeline = external_pipeline_from_recon_pipeline(
        recon_pipeline,
        solid_selection=None,
        repository_handle=repo_handle,
    )

    # Launch the run in a fake Dagster instance.
    with instance_for_test() as instance:
        celery_k8s_run_launcher.initialize(instance)
        pipeline_name = "demo_pipeline"
        run_config = {"execution": {"celery-k8s": {"config": {"job_image": "fake-image-name"}}}}
        run = create_run_for_test(
            instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            tags=tags,
        )
        celery_k8s_run_launcher.launch_run(instance, run, fake_external_pipeline)

    # Check that user defined k8s config was passed down to the k8s job.
    mock_method_calls = mock_k8s_client_batch_api.method_calls
    assert len(mock_method_calls) > 0
    method_name, _args, kwargs = mock_method_calls[0]
    assert method_name == "create_namespaced_job"
    job_resources = kwargs["body"].spec.template.spec.containers[0].resources
    assert job_resources == expected_resources
コード例 #8
0
def test_no_postgres(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, recon_repo) as workspace:
            location = workspace.get_repository_location(
                workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.
                get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(
                ),
            )
            k8s_run_launcher.register_instance(instance)
            k8s_run_launcher.launch_run(LaunchRunContext(run, workspace))

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image"

        # Check that user defined k8s config was passed down to the k8s job.
        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"
        assert DAGSTER_PG_PASSWORD_ENV_VAR not in [
            env.name
            for env in kwargs["body"].spec.template.spec.containers[0].env
        ]
コード例 #9
0
def test_check_run_health(kubeconfig_file):

    labels = {"foo_label_key": "bar_label_value"}

    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.Mock(spec_set=["read_namespaced_job"])
    mock_k8s_client_batch_api.read_namespaced_job.side_effect = [
        V1Job(status=V1JobStatus(failed=0, succeeded=0)),
        V1Job(status=V1JobStatus(failed=0, succeeded=1)),
        V1Job(status=V1JobStatus(failed=1, succeeded=0)),
    ]
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        labels=labels,
    )

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    loadable_target_origin = LoadableTargetOrigin(python_file=__file__)

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, loadable_target_origin) as workspace:
            location = workspace.get_repository_location(workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(),
            )
            k8s_run_launcher.register_instance(instance)

            # same order as side effects
            assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.RUNNING
            assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.SUCCESS
            assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.FAILED
コード例 #10
0
def get_full_external_pipeline(repo_yaml, pipeline_name):
    recon_repo = ReconstructableRepository.from_yaml(repo_yaml)
    repo_def = recon_repo.get_definition()
    external_repo = external_repo_from_def(
        repo_def,
        RepositoryHandle(
            repo_def.name,
            LocationHandle('<<MOCK_FOR_TEST>>', recon_repo.pointer),
        ),
    )
    return external_repo.get_full_external_pipeline(pipeline_name)
コード例 #11
0
    def __init__(self, recon_repo):
        self._recon_repo = check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)
        self._handle = RepositoryLocationHandle.create_in_process_location(recon_repo.pointer)

        repo_def = recon_repo.get_definition()
        def_name = repo_def.name
        self._external_repo = external_repo_from_def(
            repo_def,
            RepositoryHandle(repository_name=def_name, repository_location_handle=self._handle),
        )
        self._repositories = {self._external_repo.name: self._external_repo}
コード例 #12
0
    def __init__(self, handle):
        self._handle = check.inst_param(handle, "handle", InProcessRepositoryLocationHandle,)

        self._recon_repo = self._handle.origin.recon_repo

        repo_def = self._recon_repo.get_definition()
        def_name = repo_def.name
        self._external_repo = external_repo_from_def(
            repo_def,
            RepositoryHandle(repository_name=def_name, repository_location_handle=self._handle),
        )
        self._repositories = {self._external_repo.name: self._external_repo}
コード例 #13
0
ファイル: context.py プロジェクト: punneng/dagster
    def __init__(self, recon_repo, reloader=None):

        self._recon_repo = check.inst_param(recon_repo, 'recon_repo',
                                            ReconstructableRepository)
        self._handle = LocationHandle(self.name, recon_repo.pointer)

        self._external_repo = external_repo_from_def(
            recon_repo.get_definition(),
            RepositoryHandle(recon_repo.get_definition().name, self._handle),
        )

        self._repositories = {self._external_repo.name: self._external_repo}
        self.reloader = check.opt_inst_param(reloader, 'reloader', Reloader)
コード例 #14
0
def sync_get_external_repositories(repository_location_handle):
    check.inst_param(
        repository_location_handle, 'repository_location_handle', RepositoryLocationHandle,
    )

    check.param_invariant(
        isinstance(repository_location_handle, PythonEnvRepositoryLocationHandle),
        'repository_location_handle',
    )

    repos = []

    for key, pointer in repository_location_handle.repository_code_pointer_dict.items():
        with get_temp_file_name() as output_file:

            parts = [
                repository_location_handle.executable_path,
                '-m',
                'dagster',
                'api',
                'snapshot',
                'repository',
                output_file,
            ] + xplat_shlex_split(pointer.get_cli_args())

            execute_command_in_subprocess(parts)

            external_repository_data = read_unary_response(output_file)
            check.inst(external_repository_data, ExternalRepositoryData)

            repository_handle = RepositoryHandle(
                repository_name=external_repository_data.name,
                repository_key=key,
                repository_location_handle=repository_location_handle,
            )

            repos.append(ExternalRepository(external_repository_data, repository_handle))

    return repos
コード例 #15
0
def sync_get_external_repository(location_handle):
    check.inst_param(location_handle, 'location_handle', LocationHandle)

    with get_temp_file_name() as output_file:

        parts = ['dagster', 'api', 'snapshot', 'repository', output_file] + xplat_shlex_split(
            location_handle.pointer.get_cli_args()
        )
        returncode = subprocess.check_call(parts)
        check.invariant(returncode == 0, 'dagster api cli invocation did not complete successfully')

        messages = list(ipc_read_event_stream(output_file))
        check.invariant(len(messages) == 1)

        external_repository_data = messages[0]

        check.inst(external_repository_data, ExternalRepositoryData)

        return ExternalRepository(
            external_repository_data,
            RepositoryHandle(external_repository_data.name, location_handle),
        )
コード例 #16
0
def _execute_backfill_command_at_location(cli_args, print_fn, instance, workspace, repo_location):
    external_repo = get_external_repository_from_repo_location(
        repo_location, cli_args.get("repository")
    )

    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        cli_args.get("pipeline"),
    )

    noprompt = cli_args.get("noprompt")

    pipeline_partition_set_names = {
        external_partition_set.name: external_partition_set
        for external_partition_set in external_repo.get_external_partition_sets()
        if external_partition_set.pipeline_name == external_pipeline.name
    }

    if not pipeline_partition_set_names:
        raise click.UsageError(
            "No partition sets found for pipeline `{}`".format(external_pipeline.name)
        )
    partition_set_name = cli_args.get("partition_set")
    if not partition_set_name:
        if len(pipeline_partition_set_names) == 1:
            partition_set_name = next(iter(pipeline_partition_set_names.keys()))
        elif noprompt:
            raise click.UsageError("No partition set specified (see option `--partition-set`)")
        else:
            partition_set_name = click.prompt(
                "Select a partition set to use for backfill: {}".format(
                    ", ".join(x for x in pipeline_partition_set_names.keys())
                )
            )

    partition_set = pipeline_partition_set_names.get(partition_set_name)

    if not partition_set:
        raise click.UsageError("No partition set found named `{}`".format(partition_set_name))

    run_tags = get_tags_from_args(cli_args)

    repo_handle = RepositoryHandle(
        repository_name=external_repo.name,
        repository_location=repo_location,
    )

    try:
        partition_names_or_error = repo_location.get_external_partition_names(
            repo_handle,
            partition_set_name,
        )
    except Exception:  # pylint: disable=broad-except
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        raise DagsterBackfillFailedError(
            "Failure fetching partition names for {partition_set_name}: {error_message}".format(
                partition_set_name=partition_set_name,
                error_message=error_info.message,
            ),
            serialized_error_info=error_info,
        )

    partition_names = gen_partition_names_from_args(
        partition_names_or_error.partition_names, cli_args
    )

    # Print backfill info
    print_fn("\n     Pipeline: {}".format(external_pipeline.name))
    print_fn("Partition set: {}".format(partition_set_name))
    print_fn("   Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
        "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names))
    ):

        print_fn("Launching runs... ")

        backfill_id = make_new_backfill_id()
        backfill_job = PartitionBackfill(
            backfill_id=backfill_id,
            partition_set_origin=partition_set.get_external_origin(),
            status=BulkActionStatus.REQUESTED,
            partition_names=partition_names,
            from_failure=False,
            reexecution_steps=None,
            tags=run_tags,
            backfill_timestamp=pendulum.now("UTC").timestamp(),
        )
        try:
            partition_execution_data = (
                repo_location.get_external_partition_set_execution_param_data(
                    repository_handle=repo_handle,
                    partition_set_name=partition_set_name,
                    partition_names=partition_names,
                )
            )
        except Exception:  # pylint: disable=broad-except
            error_info = serializable_error_info_from_exc_info(sys.exc_info())
            instance.add_backfill(
                backfill_job.with_status(BulkActionStatus.FAILED).with_error(error_info)
            )
            return print_fn("Backfill failed: {}".format(error_info))

        assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData)

        for partition_data in partition_execution_data.partition_data:
            pipeline_run = create_backfill_run(
                instance,
                repo_location,
                external_pipeline,
                partition_set,
                backfill_job,
                partition_data,
            )
            if pipeline_run:
                instance.submit_run(pipeline_run.run_id, workspace)

        instance.add_backfill(backfill_job.with_status(BulkActionStatus.COMPLETED))

        print_fn("Launched backfill job `{}`".format(backfill_id))

    else:
        print_fn("Aborted!")
コード例 #17
0
def execute_backfill_command(cli_args, print_fn, instance=None):
    instance = instance or DagsterInstance.get()
    repo_location = get_repository_location_from_kwargs(cli_args, instance)
    external_repo = get_external_repository_from_repo_location(
        repo_location, cli_args.get('repository'))

    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo,
        cli_args.get('pipeline'),
    )

    noprompt = cli_args.get('noprompt')

    pipeline_partition_set_names = {
        external_partition_set.name: external_partition_set
        for external_partition_set in
        external_repo.get_external_partition_sets()
        if external_partition_set.pipeline_name == external_pipeline.name
    }

    if not pipeline_partition_set_names:
        raise click.UsageError(
            'No partition sets found for pipeline `{}`'.format(
                external_pipeline.name))
    partition_set_name = cli_args.get('partition_set')
    if not partition_set_name:
        if len(pipeline_partition_set_names) == 1:
            partition_set_name = next(iter(
                pipeline_partition_set_names.keys()))
        elif noprompt:
            raise click.UsageError(
                'No partition set specified (see option `--partition-set`)')
        else:
            partition_set_name = click.prompt(
                'Select a partition set to use for backfill: {}'.format(
                    ', '.join(x for x in pipeline_partition_set_names.keys())))

    partition_set = pipeline_partition_set_names.get(partition_set_name)

    if not partition_set:
        raise click.UsageError(
            'No partition set found named `{}`'.format(partition_set_name))

    mode = partition_set.mode
    solid_selection = partition_set.solid_selection

    repo_handle = RepositoryHandle(
        repository_name=external_repo.name,
        repository_location_handle=repo_location.location_handle,
    )

    # Resolve partitions to backfill
    partition_names_or_error = repo_location.get_external_partition_names(
        repo_handle,
        partition_set_name,
    )

    if isinstance(partition_names_or_error,
                  ExternalPartitionExecutionErrorData):
        raise DagsterBackfillFailedError(
            'Failure fetching partition names for {partition_set_name}: {error_message}'
            .format(
                partition_set_name=partition_set_name,
                error_message=partition_names_or_error.error.message,
            ),
            serialized_error_info=partition_names_or_error.error,
        )

    partition_names = gen_partition_names_from_args(
        partition_names_or_error.partition_names, cli_args)

    # Print backfill info
    print_fn('\n     Pipeline: {}'.format(external_pipeline.name))
    print_fn('Partition set: {}'.format(partition_set_name))
    print_fn('   Partitions: {}\n'.format(
        print_partition_format(partition_names, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
            'Do you want to proceed with the backfill ({} partitions)?'.format(
                len(partition_names))):

        print_fn('Launching runs... ')
        backfill_id = make_new_backfill_id()

        run_tags = merge_dicts(
            PipelineRun.tags_for_backfill_id(backfill_id),
            get_tags_from_args(cli_args),
        )

        for partition_name in partition_names:
            run_config_or_error = repo_location.get_external_partition_config(
                repo_handle, partition_set_name, partition_name)
            if isinstance(run_config_or_error,
                          ExternalPartitionExecutionErrorData):
                raise DagsterBackfillFailedError(
                    'Failure fetching run config for partition {partition_name} in {partition_set_name}: {error_message}'
                    .format(
                        partition_name=partition_name,
                        partition_set_name=partition_set_name,
                        error_message=run_config_or_error.error.message,
                    ),
                    serialized_error_info=run_config_or_error.error,
                )

            tags_or_error = repo_location.get_external_partition_tags(
                repo_handle, partition_set_name, partition_name)
            if isinstance(tags_or_error, ExternalPartitionExecutionErrorData):
                raise DagsterBackfillFailedError(
                    'Failure fetching tags for partition {partition_name} in {partition_set_name}: {error_message}'
                    .format(
                        partition_name=partition_name,
                        partition_set_name=partition_set_name,
                        error_message=tags_or_error.error.message,
                    ),
                    serialized_error_info=tags_or_error.error,
                )
            run = _create_external_pipeline_run(
                instance=instance,
                repo_location=repo_location,
                external_repo=external_repo,
                external_pipeline=external_pipeline,
                run_config=run_config_or_error.run_config,
                mode=mode,
                preset=None,
                tags=merge_dicts(tags_or_error.tags, run_tags),
                solid_selection=frozenset(solid_selection)
                if solid_selection else None,
            )

            instance.launch_run(run.run_id, external_pipeline)
            # Remove once we can handle synchronous execution... currently limited by sqlite
            time.sleep(0.1)

        print_fn('Launched backfill job `{}`'.format(backfill_id))
    else:
        print_fn('Aborted!')
コード例 #18
0
    def test_out_of_process_reload_location(self, graphql_context):
        result = execute_dagster_graphql(graphql_context,
                                         RELOAD_REPOSITORY_LOCATION_QUERY,
                                         {"repositoryLocationName": "test"})

        assert result
        assert result.data
        assert result.data["reloadRepositoryLocation"]
        assert result.data["reloadRepositoryLocation"][
            "__typename"] == "RepositoryLocation"
        assert result.data["reloadRepositoryLocation"]["name"] == "test"
        assert result.data["reloadRepositoryLocation"]["repositories"] == [{
            "name":
            "test_repo"
        }]
        assert result.data["reloadRepositoryLocation"][
            "isReloadSupported"] is True

        with mock.patch(
                # note it where the function is *used* that needs to mocked, not
                # where it is defined.
                # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
                "dagster.core.host_representation.handle.sync_list_repositories_grpc"
        ) as cli_command_mock:

            with mock.patch(
                    # note it where the function is *used* that needs to mocked, not
                    # where it is defined.
                    # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
                    "dagster.core.host_representation.repository_location.sync_get_streaming_external_repositories_grpc"
            ) as external_repository_mock:

                @repository
                def new_repo():
                    return []

                new_repo_data = external_repository_data_from_def(new_repo)

                external_repository_mock.return_value = [
                    ExternalRepository(
                        new_repo_data,
                        RepositoryHandle(
                            "new_repo", graphql_context.
                            repository_locations[0].location_handle),
                    )
                ]

                cli_command_mock.return_value = ListRepositoriesResponse(
                    repository_symbols=[],
                    executable_path=sys.executable,
                    repository_code_pointer_dict={
                        "new_repo":
                        CodePointer.from_python_file(__file__, "new_repo",
                                                     None)
                    },
                )

                result = execute_dagster_graphql(
                    graphql_context,
                    RELOAD_REPOSITORY_LOCATION_QUERY,
                    {"repositoryLocationName": "test"},
                )

                assert cli_command_mock.call_count == 1
                assert external_repository_mock.call_count == 1

                assert result.data["reloadRepositoryLocation"][
                    "repositories"] == [{
                        "name": "new_repo"
                    }]
コード例 #19
0
def test_launcher_with_container_context(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        env_vars=["FOO_TEST"],
    )

    container_context_config = {"k8s": {"env_vars": ["BAR_TEST"]}}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()

    python_origin = recon_pipeline.get_python_origin()
    python_origin = python_origin._replace(
        repository_origin=python_origin.repository_origin._replace(
            container_context=container_context_config,
        )
    )
    loadable_target_origin = LoadableTargetOrigin(python_file=__file__)

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, loadable_target_origin) as workspace:
            location = workspace.get_repository_location(workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.get_external_origin(),
                pipeline_code_origin=python_origin,
            )
            k8s_run_launcher.register_instance(instance)
            k8s_run_launcher.launch_run(LaunchRunContext(run, workspace))

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image"

        # Check that user defined k8s config was passed down to the k8s job.
        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"

        container = kwargs["body"].spec.template.spec.containers[0]

        env_names = [env.name for env in container.env]

        assert "BAR_TEST" in env_names
        assert "FOO_TEST" in env_names

        args = container.args
        assert (
            args
            == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=None,
            ).get_command_args()
        )
コード例 #20
0
def _execute_backfill_command_at_location(cli_args, print_fn, instance, repo_location):
    external_repo = get_external_repository_from_repo_location(
        repo_location, cli_args.get("repository")
    )

    external_pipeline = get_external_pipeline_from_external_repo(
        external_repo, cli_args.get("pipeline"),
    )

    noprompt = cli_args.get("noprompt")

    pipeline_partition_set_names = {
        external_partition_set.name: external_partition_set
        for external_partition_set in external_repo.get_external_partition_sets()
        if external_partition_set.pipeline_name == external_pipeline.name
    }

    if not pipeline_partition_set_names:
        raise click.UsageError(
            "No partition sets found for pipeline `{}`".format(external_pipeline.name)
        )
    partition_set_name = cli_args.get("partition_set")
    if not partition_set_name:
        if len(pipeline_partition_set_names) == 1:
            partition_set_name = next(iter(pipeline_partition_set_names.keys()))
        elif noprompt:
            raise click.UsageError("No partition set specified (see option `--partition-set`)")
        else:
            partition_set_name = click.prompt(
                "Select a partition set to use for backfill: {}".format(
                    ", ".join(x for x in pipeline_partition_set_names.keys())
                )
            )

    partition_set = pipeline_partition_set_names.get(partition_set_name)

    if not partition_set:
        raise click.UsageError("No partition set found named `{}`".format(partition_set_name))

    mode = partition_set.mode
    solid_selection = partition_set.solid_selection
    run_tags = get_tags_from_args(cli_args)

    repo_handle = RepositoryHandle(
        repository_name=external_repo.name,
        repository_location_handle=repo_location.location_handle,
    )

    # Resolve partitions to backfill
    partition_names_or_error = repo_location.get_external_partition_names(
        repo_handle, partition_set_name,
    )

    if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData):
        raise DagsterBackfillFailedError(
            "Failure fetching partition names for {partition_set_name}: {error_message}".format(
                partition_set_name=partition_set_name,
                error_message=partition_names_or_error.error.message,
            ),
            serialized_error_info=partition_names_or_error.error,
        )

    partition_names = gen_partition_names_from_args(
        partition_names_or_error.partition_names, cli_args
    )

    # Print backfill info
    print_fn("\n     Pipeline: {}".format(external_pipeline.name))
    print_fn("Partition set: {}".format(partition_set_name))
    print_fn("   Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15)))

    # Confirm and launch
    if noprompt or click.confirm(
        "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names))
    ):

        print_fn("Launching runs... ")

        backfill_id = make_new_backfill_id()
        backfill_tags = PipelineRun.tags_for_backfill_id(backfill_id)
        partition_execution_data = repo_location.get_external_partition_set_execution_param_data(
            repository_handle=repo_handle,
            partition_set_name=partition_set_name,
            partition_names=partition_names,
        )

        if isinstance(partition_execution_data, ExternalPartitionExecutionErrorData):
            return print_fn("Backfill failed: {}".format(partition_execution_data.error))

        assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData)

        for partition_data in partition_execution_data.partition_data:
            run = _create_external_pipeline_run(
                instance=instance,
                repo_location=repo_location,
                external_repo=external_repo,
                external_pipeline=external_pipeline,
                run_config=partition_data.run_config,
                mode=mode,
                preset=None,
                tags=merge_dicts(merge_dicts(partition_data.tags, backfill_tags), run_tags),
                solid_selection=frozenset(solid_selection) if solid_selection else None,
            )

            instance.launch_run(run.run_id, external_pipeline)

        print_fn("Launched backfill job `{}`".format(backfill_id))

    else:
        print_fn("Aborted!")
コード例 #21
0
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    # Construct Dagster run tags with user defined k8s config.
    expected_resources = {
        "requests": {
            "cpu": "250m",
            "memory": "64Mi"
        },
        "limits": {
            "cpu": "500m",
            "memory": "2560Mi"
        },
    }
    user_defined_k8s_config = UserDefinedDagsterK8sConfig(
        container_config={"resources": expected_resources}, )
    user_defined_k8s_config_json = json.dumps(
        user_defined_k8s_config.to_dict())
    tags = {"dagster-k8s/config": user_defined_k8s_config_json}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    location_origin = InProcessRepositoryLocationOrigin(recon_repo)
    with location_origin.create_location() as location:
        repo_handle = RepositoryHandle(
            repository_name=repo_def.name,
            repository_location=location,
        )
        fake_external_pipeline = external_pipeline_from_recon_pipeline(
            recon_pipeline,
            solid_selection=None,
            repository_handle=repo_handle,
        )

        # Launch the run in a fake Dagster instance.
        with instance_for_test() as instance:
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(instance,
                                      pipeline_name=pipeline_name,
                                      tags=tags)
            k8s_run_launcher.register_instance(instance)
            run = k8s_run_launcher.launch_run(run, fake_external_pipeline)

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image"

        # Check that user defined k8s config was passed down to the k8s job.
        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"
        job_resources = kwargs["body"].spec.template.spec.containers[
            0].resources
        assert job_resources == expected_resources
        assert DAGSTER_PG_PASSWORD_ENV_VAR in [
            env.name
            for env in kwargs["body"].spec.template.spec.containers[0].env
        ]
コード例 #22
0
ファイル: test_launcher.py プロジェクト: helloworld/dagster
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file):

    labels = {"foo_label_key": "bar_label_value"}

    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        labels=labels,
    )

    # Construct Dagster run tags with user defined k8s config.
    expected_resources = {
        "requests": {
            "cpu": "250m",
            "memory": "64Mi"
        },
        "limits": {
            "cpu": "500m",
            "memory": "2560Mi"
        },
    }
    user_defined_k8s_config = UserDefinedDagsterK8sConfig(
        container_config={"resources": expected_resources}, )
    user_defined_k8s_config_json = json.dumps(
        user_defined_k8s_config.to_dict())
    tags = {"dagster-k8s/config": user_defined_k8s_config_json}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    with instance_for_test() as instance:
        with in_process_test_workspace(instance, recon_repo) as workspace:
            location = workspace.get_repository_location(
                workspace.repository_location_names[0])

            repo_def = recon_repo.get_definition()
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            celery_k8s_run_launcher.register_instance(instance)
            pipeline_name = "demo_pipeline"
            run_config = {
                "execution": {
                    "celery-k8s": {
                        "config": {
                            "job_image": "fake-image-name"
                        }
                    }
                }
            }
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                run_config=run_config,
                tags=tags,
                external_pipeline_origin=fake_external_pipeline.
                get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(
                ),
            )
            celery_k8s_run_launcher.launch_run(LaunchRunContext(
                run, workspace))

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake-image-name"

            # Check that user defined k8s config was passed down to the k8s job.
            mock_method_calls = mock_k8s_client_batch_api.method_calls
            assert len(mock_method_calls) > 0
            method_name, _args, kwargs = mock_method_calls[0]
            assert method_name == "create_namespaced_job"

            container = kwargs["body"].spec.template.spec.containers[0]

            job_resources = container.resources
            assert job_resources == expected_resources

            labels = kwargs["body"].spec.template.metadata.labels
            assert labels["foo_label_key"] == "bar_label_value"

            args = container.args
            assert (args == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=None,
            ).get_command_args())
コード例 #23
0
ファイル: test_launcher.py プロジェクト: helloworld/dagster
def test_raise_on_error(kubeconfig_file):
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        fail_pod_on_run_failure=True,
    )
    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    with instance_for_test() as instance:
        with in_process_test_workspace(instance, recon_repo) as workspace:
            location = workspace.get_repository_location(
                workspace.repository_location_names[0])

            repo_def = recon_repo.get_definition()
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            celery_k8s_run_launcher.register_instance(instance)
            pipeline_name = "demo_pipeline"
            run_config = {
                "execution": {
                    "celery-k8s": {
                        "config": {
                            "job_image": "fake-image-name"
                        }
                    }
                }
            }
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                run_config=run_config,
                external_pipeline_origin=fake_external_pipeline.
                get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(
                ),
            )
            celery_k8s_run_launcher.launch_run(LaunchRunContext(
                run, workspace))

            # Check that user defined k8s config was passed down to the k8s job.
            mock_method_calls = mock_k8s_client_batch_api.method_calls
            assert len(mock_method_calls) > 0
            method_name, _args, kwargs = mock_method_calls[0]
            assert method_name == "create_namespaced_job"

            container = kwargs["body"].spec.template.spec.containers[0]

            args = container.args
            assert (args == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=True,
            ).get_command_args())