Ejemplo n.º 1
0
    def from_handle(repository_location_handle):
        check.inst_param(repository_location_handle,
                         'repository_location_handle',
                         RepositoryLocationHandle)

        if isinstance(repository_location_handle,
                      InProcessRepositoryLocationHandle):
            check.invariant(
                len(repository_location_handle.repository_code_pointer_dict) ==
                1)
            pointer = next(
                iter(repository_location_handle.repository_code_pointer_dict.
                     values()))
            return InProcessRepositoryLocation(
                ReconstructableRepository(pointer))
        elif isinstance(repository_location_handle,
                        PythonEnvRepositoryLocationHandle):
            return PythonEnvRepositoryLocation(repository_location_handle)
        elif isinstance(repository_location_handle,
                        GrpcServerRepositoryLocationHandle) or isinstance(
                            repository_location_handle,
                            ManagedGrpcPythonEnvRepositoryLocationHandle):
            return GrpcServerRepositoryLocation(repository_location_handle)
        else:
            check.failed(
                'Unsupported handle: {}'.format(repository_location_handle))
Ejemplo n.º 2
0
def create_app_from_workspace(workspace, instance):
    check.inst_param(workspace, 'workspace', Workspace)
    check.inst_param(instance, 'instance', DagsterInstance)

    warn_if_compute_logs_disabled()

    print('Loading repository...')

    locations = []
    for repository_location_handle in workspace.repository_location_handles:
        if isinstance(repository_location_handle,
                      InProcessRepositoryLocationHandle):
            # will need to change for multi repo
            check.invariant(
                len(repository_location_handle.repository_code_pointer_dict) ==
                1)
            pointer = next(
                iter(repository_location_handle.repository_code_pointer_dict.
                     values()))
            recon_repo = ReconstructableRepository(pointer)
            locations.append(InProcessRepositoryLocation(recon_repo))
        elif isinstance(repository_location_handle,
                        PythonEnvRepositoryLocationHandle):
            locations.append(
                PythonEnvRepositoryLocation(repository_location_handle))
        else:
            check.failed('{} unsupported'.format(repository_location_handle))

    context = DagsterGraphQLContext(instance=instance,
                                    locations=locations,
                                    version=__version__)

    return instantiate_app_with_views(context)
Ejemplo n.º 3
0
Archivo: cli.py Proyecto: cy56/dagster
def host_dagit_ui_with_workspace(workspace,
                                 host,
                                 port,
                                 storage_fallback,
                                 port_lookup=True):
    check.inst_param(workspace, 'workspace', Workspace)

    instance = DagsterInstance.get(storage_fallback)

    if len(workspace.repository_location_handles) == 1:
        repository_location_handle = workspace.repository_location_handles[0]
        if len(repository_location_handle.repository_code_pointer_dict) == 1:
            pointer = next(
                iter(repository_location_handle.repository_code_pointer_dict.
                     values()))

            from dagster.core.definitions.reconstructable import ReconstructableRepository

            recon_repo = ReconstructableRepository(pointer)

            log_repo_stats(instance=instance, repo=recon_repo, source='dagit')

    app = create_app_from_workspace(workspace, instance)

    start_server(host, port, app, port_lookup)
Ejemplo n.º 4
0
    def get_external_origin(self):
        """
        Hack! Inject origin that the k8s images will use. The BK image uses a different directory
        structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images
        inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't
        work, we need to inject this one.
        """

        return ExternalJobOrigin(
            external_repository_origin=ExternalRepositoryOrigin(
                repository_location_origin=InProcessRepositoryLocationOrigin(
                    recon_repo=ReconstructableRepository(
                        pointer=FileCodePointer(
                            python_file=
                            "/dagster_test/test_project/test_pipelines/repo.py",
                            fn_name="define_demo_execution_repo",
                        ),
                        container_image=self._container_image,
                        executable_path="python",
                        entry_point=DEFAULT_DAGSTER_ENTRY_POINT,
                    )),
                repository_name="demo_execution_repo",
            ),
            job_name=self.name,
        )
Ejemplo n.º 5
0
def step_context_to_step_run_ref(
    step_context: StepExecutionContext,
    prior_attempts_count: int,
    package_dir: Optional[str] = None,
) -> StepRunRef:
    """
    Args:
        step_context (StepExecutionContext): The step context.
        prior_attempts_count (int): The number of times this time has been tried before in the same
            pipeline run.
        package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted
            to be relative a module pointer relative to the package root.  This enables executing
            steps in remote setups where the package containing the pipeline resides at a different
            location on the filesystem in the remote environment than in the environment executing
            the plan process.

    Returns (StepRunRef):
        A reference to the step.
    """

    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.int_param(prior_attempts_count, "prior_attempts_count")

    retry_mode = step_context.retry_mode

    recon_pipeline = step_context.pipeline
    if package_dir:
        if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance(
                recon_pipeline.repository.pointer, FileCodePointer):
            recon_pipeline = ReconstructablePipeline(
                repository=ReconstructableRepository(
                    pointer=ModuleCodePointer(
                        _module_in_package_dir(
                            recon_pipeline.repository.pointer.python_file,
                            package_dir),
                        recon_pipeline.repository.pointer.fn_name,
                        working_directory=os.getcwd(),
                    ),
                    container_image=recon_pipeline.repository.container_image,
                    executable_path=recon_pipeline.repository.executable_path,
                    entry_point=recon_pipeline.repository.entry_point,
                ),
                pipeline_name=recon_pipeline.pipeline_name,
                solids_to_execute=recon_pipeline.solids_to_execute,
            )

    parent_run_id = step_context.pipeline_run.parent_run_id
    parent_run = step_context.instance.get_run_by_id(
        parent_run_id) if parent_run_id else None
    return StepRunRef(
        run_config=step_context.run_config,
        pipeline_run=step_context.pipeline_run,
        run_id=step_context.pipeline_run.run_id,
        step_key=step_context.step.key,
        retry_mode=retry_mode,
        recon_pipeline=recon_pipeline,
        prior_attempts_count=prior_attempts_count,
        known_state=step_context.execution_plan.known_state,
        parent_run=parent_run,
    )
Ejemplo n.º 6
0
def test_queued_pipeline_origin_check():

    code_pointer = ModuleCodePointer("fake", "fake")
    fake_pipeline_origin = ExternalPipelineOrigin(
        ExternalRepositoryOrigin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository(code_pointer)),
            "foo_repo",
        ),
        "foo",
    )

    fake_code_origin = PipelinePythonOrigin(
        pipeline_name="foo",
        repository_origin=RepositoryPythonOrigin(
            sys.executable,
            code_pointer,
        ),
    )

    PipelineRun(
        status=PipelineRunStatus.QUEUED,
        external_pipeline_origin=fake_pipeline_origin,
        pipeline_code_origin=fake_code_origin,
    )

    with pytest.raises(check.CheckError):
        PipelineRun(status=PipelineRunStatus.QUEUED)

    with pytest.raises(check.CheckError):
        PipelineRun().with_status(PipelineRunStatus.QUEUED)
Ejemplo n.º 7
0
def host_dagit_ui_with_workspace(instance,
                                 workspace,
                                 host,
                                 port,
                                 path_prefix,
                                 port_lookup=True):
    check.inst_param(instance, 'instance', DagsterInstance)
    check.inst_param(workspace, 'workspace', Workspace)

    if len(workspace.repository_location_handles) == 1:
        repository_location_handle = workspace.repository_location_handles[0]

        # Telemetry logic needs to be updated to support multi-repo / gRPC repo locations
        # See https://github.com/dagster-io/dagster/issues/2752
        if (hasattr(repository_location_handle, 'repository_code_pointer_dict')
                and len(
                    repository_location_handle.repository_code_pointer_dict)
                == 1):
            pointer = next(
                iter(repository_location_handle.repository_code_pointer_dict.
                     values()))
            from dagster.core.definitions.reconstructable import ReconstructableRepository

            recon_repo = ReconstructableRepository(pointer)

            log_repo_stats(instance=instance, repo=recon_repo, source='dagit')

    app = create_app_from_workspace(workspace, instance, path_prefix)

    start_server(instance, host, port, path_prefix, app, port_lookup)
Ejemplo n.º 8
0
def recon_repository_from_origin(origin):
    check.inst_param(origin, "origin", RepositoryPythonOrigin)
    return ReconstructableRepository(
        origin.code_pointer,
        origin.container_image,
        origin.executable_path,
        origin.entry_point,
    )
Ejemplo n.º 9
0
    def _recon_repository_from_origin(self, external_repository_origin):
        check.inst_param(
            external_repository_origin, "external_repository_origin", ExternalRepositoryOrigin,
        )

        return ReconstructableRepository(
            self._repository_symbols_and_code_pointers.code_pointers_by_repo_name[
                external_repository_origin.repository_name
            ],
            self._get_current_image(),
        )
Ejemplo n.º 10
0
def step_context_to_step_run_ref(step_context, prior_attempts_count, package_dir=None):
    '''
    Args:
        step_context (SystemStepExecutionContext): The step context.
        prior_attempts_count (int): The number of times this time has been tried before in the same
            pipeline run.
        package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted
            to be relative a module pointer relative to the package root.  This enables executing
            steps in remote setups where the package containing the pipeline resides at a different
            location on the filesystem in the remote environment than in the environment executing
            the plan process.

    Returns (StepRunRef):
        A reference to the step.
    '''
    recon_pipeline = step_context.pipeline
    if package_dir:

        if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance(
            recon_pipeline.pointer, FileCodePointer
        ):
            recon_pipeline = ReconstructablePipeline(
                pointer=ModuleCodePointer(
                    _module_in_package_dir(recon_pipeline.pointer.python_file, package_dir),
                    recon_pipeline.pointer.fn_name,
                ),
                frozen_solid_subset=recon_pipeline.frozen_solid_subset,
            )
        elif isinstance(recon_pipeline, ReconstructablePipelineFromRepo) and isinstance(
            recon_pipeline.repository.pointer, FileCodePointer
        ):
            recon_pipeline = ReconstructablePipelineFromRepo(
                repository=ReconstructableRepository(
                    pointer=ModuleCodePointer(
                        _module_in_package_dir(
                            recon_pipeline.repository.pointer.python_file, package_dir
                        ),
                        recon_pipeline.repository.pointer.fn_name,
                    ),
                ),
                pipeline_name=recon_pipeline.pipeline_name,
                frozen_solid_subset=recon_pipeline.frozen_solid_subset,
            )

    return StepRunRef(
        environment_dict=step_context.environment_dict,
        pipeline_run=step_context.pipeline_run,
        run_id=step_context.pipeline_run.run_id,
        step_key=step_context.step.key,
        executor_config=step_context.executor_config,
        recon_pipeline=recon_pipeline,
        prior_attempts_count=prior_attempts_count,
    )
Ejemplo n.º 11
0
    def _recon_repository_from_origin(self, repository_origin):
        check.inst_param(
            repository_origin,
            'repository_origin',
            RepositoryOrigin,
        )

        if isinstance(repository_origin, RepositoryGrpcServerOrigin):
            return ReconstructableRepository(
                self._repository_code_pointer_dict[
                    repository_origin.repository_name])
        return recon_repository_from_origin(repository_origin)
Ejemplo n.º 12
0
    def _recon_repository_from_origin(self, repository_origin):
        check.inst_param(
            repository_origin,
            "repository_origin",
            RepositoryOrigin,
        )

        if isinstance(repository_origin, RepositoryGrpcServerOrigin):
            return ReconstructableRepository(
                self._repository_symbols_and_code_pointers.
                code_pointers_by_repo_name[repository_origin.repository_name])
        return recon_repository_from_origin(repository_origin)
Ejemplo n.º 13
0
def create_invalid_run(instance, **kwargs):
    create_run_for_test(
        instance,
        external_pipeline_origin=ExternalPipelineOrigin(
            ExternalRepositoryOrigin(
                InProcessRepositoryLocationOrigin(
                    ReconstructableRepository(ModuleCodePointer(
                        "fake", "fake"))),
                "foo",
            ),
            "wrong-pipeline",
        ),
        pipeline_name="wrong-pipeline",
        **kwargs,
    )
Ejemplo n.º 14
0
    def execute_plan(
        self,
        instance,
        external_pipeline,
        run_config,
        pipeline_run,
        step_keys_to_execute,
        retries=None,
    ):
        if (is_repository_location_in_same_python_env(self.location_handle) and
                len(self.location_handle.repository_code_pointer_dict) == 1):
            check.inst_param(instance, 'instance', DagsterInstance)
            check.inst_param(external_pipeline, 'external_pipeline',
                             ExternalPipeline)
            check.dict_param(run_config, 'run_config')
            check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
            check.opt_list_param(step_keys_to_execute,
                                 'step_keys_to_execute',
                                 of_type=str)

            pointer = next(
                iter(self.location_handle.repository_code_pointer_dict.values(
                )))
            recon_repo = ReconstructableRepository(pointer)

            execution_plan = create_execution_plan(
                pipeline=recon_repo.get_reconstructable_pipeline(
                    external_pipeline.name).
                subset_for_execution_from_existing_pipeline(
                    external_pipeline.solids_to_execute),
                run_config=run_config,
                mode=pipeline_run.mode,
                step_keys_to_execute=step_keys_to_execute,
            )

            execute_plan(
                execution_plan=execution_plan,
                instance=instance,
                pipeline_run=pipeline_run,
                run_config=run_config,
                retries=retries,
            )
        else:
            raise NotImplementedError(
                'execute_plan is currently only supported when the location is a python '
                'environment with the exact same executable and when there is only a single '
                'repository.')
Ejemplo n.º 15
0
def create_app_from_workspace(workspace, instance, path_prefix=''):
    check.inst_param(workspace, 'workspace', Workspace)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.str_param(path_prefix, 'path_prefix')

    if path_prefix:
        if not path_prefix.startswith('/'):
            raise Exception('The path prefix should begin with a leading "/".')
        if path_prefix.endswith('/'):
            raise Exception(
                'The path prefix should not include a trailing "/".')

    warn_if_compute_logs_disabled()

    print('Loading repository...')  # pylint: disable=print-call

    locations = []
    for repository_location_handle in workspace.repository_location_handles:
        if isinstance(repository_location_handle,
                      InProcessRepositoryLocationHandle):
            # will need to change for multi repo
            check.invariant(
                len(repository_location_handle.repository_code_pointer_dict) ==
                1)
            pointer = next(
                iter(repository_location_handle.repository_code_pointer_dict.
                     values()))
            recon_repo = ReconstructableRepository(pointer)
            locations.append(InProcessRepositoryLocation(recon_repo))
        elif isinstance(repository_location_handle,
                        PythonEnvRepositoryLocationHandle):
            locations.append(
                PythonEnvRepositoryLocation(repository_location_handle))
        elif isinstance(repository_location_handle,
                        GrpcServerRepositoryLocationHandle):
            locations.append(
                GrpcServerRepositoryLocation(repository_location_handle))
        else:
            check.failed('{} unsupported'.format(repository_location_handle))

    context = DagsterGraphQLContext(instance=instance,
                                    locations=locations,
                                    version=__version__)

    return instantiate_app_with_views(context, path_prefix)
Ejemplo n.º 16
0
def test_queued_pipeline_origin_check():
    fake_pipeline_origin = ExternalPipelineOrigin(
        ExternalRepositoryOrigin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository(ModuleCodePointer("fake", "fake"))),
            "foo_repo",
        ),
        "foo",
    )

    PipelineRun(status=PipelineRunStatus.QUEUED,
                external_pipeline_origin=fake_pipeline_origin)

    with pytest.raises(check.CheckError):
        PipelineRun(status=PipelineRunStatus.QUEUED)

    with pytest.raises(check.CheckError):
        PipelineRun().with_status(PipelineRunStatus.QUEUED)
Ejemplo n.º 17
0
    def __init__(self, loadable_target_origin, entry_point):
        self._loadable_target_origin = loadable_target_origin

        self._code_pointers_by_repo_name = {}
        self._recon_repos_by_name = {}
        self._loadable_repository_symbols = []

        if not loadable_target_origin:
            return

        loadable_targets = get_loadable_targets(
            loadable_target_origin.python_file,
            loadable_target_origin.module_name,
            loadable_target_origin.package_name,
            loadable_target_origin.working_directory,
            loadable_target_origin.attribute,
        )
        for loadable_target in loadable_targets:
            pointer = _get_code_pointer(loadable_target_origin, loadable_target)
            recon_repo = ReconstructableRepository(
                pointer,
                _get_current_image(),
                sys.executable,
                entry_point=entry_point,
            )
            repo_def = recon_repo.get_definition()
            # force load of all lazy constructed jobs/pipelines
            repo_def.get_all_pipelines()

            self._code_pointers_by_repo_name[repo_def.name] = pointer
            self._recon_repos_by_name[repo_def.name] = recon_repo
            self._loadable_repository_symbols.append(
                LoadableRepositorySymbol(
                    attribute=loadable_target.attribute,
                    repository_name=repo_def.name,
                )
            )
Ejemplo n.º 18
0
def recon_repo_from_external_repo(external_repo):
    return ReconstructableRepository(external_repo.get_python_origin().code_pointer)
Ejemplo n.º 19
0
def step_context_to_step_run_ref(step_context,
                                 prior_attempts_count,
                                 package_dir=None):
    '''
    Args:
        step_context (SystemStepExecutionContext): The step context.
        prior_attempts_count (int): The number of times this time has been tried before in the same
            pipeline run.
        package_dir (Optional[str]): If set, the reconstruction file code pointer will be converted
            to be relative a module pointer relative to the package root.  This enables executing
            steps in remote setups where the package containing the pipeline resides at a different
            location on the filesystem in the remote environment than in the environment executing
            the plan process.

    Returns (StepRunRef):
        A reference to the step.
    '''

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)

    # This is a bit goofy right now, as the relationship between
    # step launcher and the executor is a little weird.
    #
    # As far as I can tell, the only goal of interacting with the executor here
    # is for the step launcher to inherit the retry policy. The step launcher
    # actually assumes that a retry policy exists (hence the invariants below).
    # However this isn't a formal requirement, so this can break. E.g. right
    # now using a dask executor and the step launcher would fail.
    #
    # Once migration is complete, I think we should add a retries property
    # to the Executor abc and make it optional. Then change this to handle
    # the "no retry" case
    check.param_invariant(
        hasattr(step_context.executor, 'retries'),
        'step_context',
        'Executor must have retries property',
    )
    check.param_invariant(
        isinstance(step_context.executor.retries, Retries),
        'step_context',
        'Executor retries property must be of type Retries. Object was {}'.
        format(step_context.executor),
    )
    check.int_param(prior_attempts_count, 'prior_attempts_count')

    retries = step_context.executor.retries

    recon_pipeline = step_context.pipeline
    if package_dir:
        if isinstance(recon_pipeline, ReconstructablePipeline) and isinstance(
                recon_pipeline.repository.pointer, FileCodePointer):
            recon_pipeline = ReconstructablePipeline(
                repository=ReconstructableRepository(pointer=ModuleCodePointer(
                    _module_in_package_dir(
                        recon_pipeline.repository.pointer.python_file,
                        package_dir),
                    recon_pipeline.repository.pointer.fn_name,
                ), ),
                pipeline_name=recon_pipeline.pipeline_name,
                solids_to_execute=recon_pipeline.solids_to_execute,
            )

    return StepRunRef(
        run_config=step_context.run_config,
        pipeline_run=step_context.pipeline_run,
        run_id=step_context.pipeline_run.run_id,
        step_key=step_context.step.key,
        retries=retries,
        recon_pipeline=recon_pipeline,
        prior_attempts_count=prior_attempts_count,
    )
Ejemplo n.º 20
0
def recon_pipeline_from_origin(origin):
    check.inst_param(origin, "origin", PipelinePythonOrigin)
    recon_repo = ReconstructableRepository(origin.get_repo_pointer())
    return recon_repo.get_reconstructable_pipeline(origin.pipeline_name)
Ejemplo n.º 21
0
def recon_repository_from_origin(origin):
    check.inst_param(origin, "origin", RepositoryOrigin)
    return ReconstructableRepository(origin.code_pointer)