Example #1
0
def _validate_config(config_type, config_value, stack):
    check.inst_param(config_type, 'config_type', ConfigType)
    check.inst_param(stack, 'stack', EvaluationStack)

    if config_type.is_scalar:
        if not config_type.is_config_scalar_valid(config_value):
            yield EvaluationError(
                stack=stack,
                reason=DagsterEvaluationErrorReason.RUNTIME_TYPE_MISMATCH,
                message='Value {value} is not valid for type {type_name}'.
                format(value=config_value, type_name=config_type.name),
                error_data=RuntimeMismatchErrorData(
                    config_type=config_type, value_rep=repr(config_value)),
            )
        return

    errors = []

    if config_type.is_any:
        # no-op: we're safe
        return
    elif config_type.is_selector:
        errors = validate_selector_config_value(config_type, config_value,
                                                stack)
    elif config_type.is_composite:
        errors = validate_composite_config_value(config_type, config_value,
                                                 stack)
    elif config_type.is_list:
        errors = validate_list_value(config_type, config_value, stack)
    elif config_type.is_nullable:
        errors = ([] if config_value is None else _validate_config(
            config_type.inner_type, config_value, stack))
    elif config_type.is_enum:
        errors = validate_enum_value(config_type, config_value, stack)
    else:
        check.failed('Unsupported type {name}'.format(name=config_type.name))

    for error in errors:
        yield error
Example #2
0
    def cp_object(self, src, dst):
        check.invariant(not os.path.exists(dst),
                        'Path already exists {}'.format(dst))

        # Ensure output path exists
        mkdir_p(os.path.dirname(dst))

        if os.path.isfile(src):
            shutil.copy(src, dst)
        elif os.path.isdir(src):
            shutil.copytree(src, dst)
        else:
            check.failed('should not get here')

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.CP_OBJECT,
            key=src,
            dest_key=dst,
            obj=None,
            serialization_strategy_name=None,
            object_store_name=self.name,
        )
Example #3
0
def construct_run_storage(run_storage_mode):
    '''
    Construct the run storage for this pipeline. Our rules are the following:

    If the RunConfig has a storage_mode provided, we use that.

    Then we fallback to environment config.

    If there is no config, we default to in memory storage. This is mostly so
    that tests default to in-memory.
    '''
    check.inst_param(run_storage_mode, 'run_storage_mode', RunStorageMode)

    if run_storage_mode == RunStorageMode.FILESYSTEM:
        return FileSystemRunStorage()
    elif run_storage_mode == RunStorageMode.IN_MEMORY:
        return InMemoryRunStorage()
    elif run_storage_mode == RunStorageMode.S3:
        # TODO: Revisit whether we want to use S3 run storage
        return FileSystemRunStorage()
    else:
        check.failed('Unexpected enum {}'.format(run_storage_mode))
Example #4
0
    def rehydrate(self, **constructor_kwargs):
        from dagster.core.errors import DagsterInvalidConfigError
        from dagster.core.types.evaluator import evaluate_config

        try:
            module = importlib.import_module(self.module_name)
        except seven.ModuleNotFoundError:
            check.failed(
                'Couldn\'t import module {module_name} when attempting to rehydrate the '
                'configurable class {configurable_class}'.format(
                    module_name=self.module_name,
                    configurable_class=self.module_name + '.' +
                    self.class_name,
                ))
        try:
            klass = getattr(module, self.class_name)
        except AttributeError:
            check.failed(
                'Couldn\'t find class {class_name} in module when attempting to rehydrate the '
                'configurable class {configurable_class}'.format(
                    class_name=self.class_name,
                    configurable_class=self.module_name + '.' +
                    self.class_name,
                ))

        if not issubclass(klass, ConfigurableClass):
            raise check.CheckError(
                klass,
                'class {class_name} in module {module_name}'.format(
                    class_name=self.class_name, module_name=self.module_name),
                ConfigurableClass,
            )

        config_dict = yaml.load(self.config_yaml)
        result = evaluate_config(klass.config_type().inst(), config_dict)
        if not result.success:
            raise DagsterInvalidConfigError(None, result.errors, config_dict)
        return klass.from_config_value(self, result.value,
                                       **constructor_kwargs)
Example #5
0
    def from_current_module(
        resource_defs: Optional[Mapping[str, ResourceDefinition]] = None,
        executor_def: Optional[ExecutorDefinition] = None,
    ) -> "AssetGroup":
        """
        Constructs an AssetGroup that includes all asset definitions and source assets in the module
        where this is called from.

        Args:
            resource_defs (Optional[Mapping[str, ResourceDefinition]]): A dictionary of resource
                definitions to include on the returned asset group.
            executor_def (Optional[ExecutorDefinition]): An executor to include on the returned
                asset group.

        Returns:
            AssetGroup: An asset group with all the assets defined in the module.
        """
        caller = inspect.stack()[1]
        module = inspect.getmodule(caller[0])
        if module is None:
            check.failed("Could not find a module for the caller")
        return AssetGroup.from_modules([module], resource_defs, executor_def)
Example #6
0
def get_upstream_partitions_for_partition_range(
    downstream_assets_def: AssetsDefinition,
    upstream_partitions_def: PartitionsDefinition,
    upstream_asset_key: AssetKey,
    downstream_partition_key_range: PartitionKeyRange,
) -> PartitionKeyRange:
    """Returns the range of partition keys in the upstream asset that include data necessary
    to compute the contents of the given partition key range in the downstream asset.
    """

    if downstream_assets_def.partitions_def is None:
        check.failed("downstream asset is not partitioned")

    if upstream_partitions_def is None:
        check.failed("upstream asset is not partitioned")

    downstream_partition_mapping = downstream_assets_def.get_partition_mapping(upstream_asset_key)
    return downstream_partition_mapping.get_upstream_partitions_for_partition_range(
        downstream_partition_key_range,
        downstream_assets_def.partitions_def,
        upstream_partitions_def,
    )
Example #7
0
def _recurse_in_to_array(context, config_value):
    check.invariant(context.config_type.kind == ConfigTypeKind.ARRAY,
                    'Unexpected non array type')

    if not config_value:
        return EvaluateValueResult.for_value([])

    if context.config_type.inner_type.kind != ConfigTypeKind.NONEABLE:
        if any((cv is None for cv in config_value)):
            check.failed('Null array member not caught in validation')

    results = [
        _recursively_process_config(context.for_array(idx), item)
        for idx, item in enumerate(config_value)
    ]

    errors = [result.error for result in results if not result.success]
    if errors:
        return EvaluateValueResult.for_errors(errors)

    return EvaluateValueResult.for_value(
        frozenlist([result.value for result in results]))
Example #8
0
    def _get_solid_deps_and_defs(self,
                                 assets_to_update,
                                 include_nothing_input=False):
        solid_defs = {}
        for asset in assets_to_update:
            if asset.computation:
                solid_defs[asset.path] = self.get_computed_asset_solid_def(
                    asset, assets_to_update, include_nothing_input)
            else:
                check.failed(
                    "All elements of assets_to_update must have computations")

        solid_deps = {
            solid_defs[asset.path].name: {
                solid_defs[dep.asset.path].name:
                DependencyDefinition(solid_defs[dep.asset.path].name)
                for dep in asset.computation.deps.values()
                if dep.asset in assets_to_update
            }
            for asset in assets_to_update if asset.computation
        }
        return solid_defs, solid_deps
Example #9
0
 def map(self, fn):
     check.is_callable(fn)
     result = fn(InvokedSolidOutputHandle(self.solid_name,
                                          self.output_name))
     if isinstance(result, InvokedSolidOutputHandle):
         return InvokedSolidDynamicOutputWrapper(result.solid_name,
                                                 result.output_name)
     elif isinstance(result, tuple) and all(
             map(lambda item: isinstance(item, InvokedSolidOutputHandle),
                 result)):
         return tuple(
             map(
                 lambda item: InvokedSolidDynamicOutputWrapper(
                     item.solid_name, item.output_name),
                 result,
             ))
     elif result is None:
         return None
     else:
         check.failed(
             f"Could not handle output from map function invoked on {self.solid_name}:{self.output_name}, received {result}"
         )
Example #10
0
def invoke_executor_on_plan(pipeline_context, execution_plan, step_keys_to_execute=None):
    if step_keys_to_execute:
        for step_key in step_keys_to_execute:
            if not execution_plan.has_step(step_key):
                raise DagsterExecutionStepNotFoundError(step_key=step_key)

    if isinstance(pipeline_context.executor_config, InProcessExecutorConfig):
        step_events_gen = start_inprocess_executor(
            pipeline_context,
            execution_plan,
            pipeline_context.intermediates_manager,
            step_keys_to_execute,
        )
    elif isinstance(pipeline_context.executor_config, MultiprocessExecutorConfig):
        step_events_gen = multiprocess_execute_plan(
            pipeline_context, execution_plan, step_keys_to_execute
        )
    else:
        check.failed('Unsupported config {}'.format(pipeline_context.executor_config))

    for step_event in step_events_gen:
        yield step_event
Example #11
0
def create_execution_plan_from_steps(pipeline_context, steps):
    check.inst_param(pipeline_context, 'pipeline_context',
                     PipelineExecutionContext)
    check.list_param(steps, 'steps', of_type=ExecutionStep)

    step_dict = {step.key: step for step in steps}
    deps = {step.key: set() for step in steps}

    seen_keys = set()

    for step in steps:
        if step.key in seen_keys:
            keys = [s.key for s in steps]
            check.failed('Duplicated key {key}. Full list: {key_list}.'.format(
                key=step.key, key_list=keys))

        seen_keys.add(step.key)

        for step_input in step.step_inputs:
            deps[step.key].add(step_input.prev_output_handle.step.key)

    return ExecutionPlan(pipeline_context.pipeline_def, step_dict, deps)
Example #12
0
def worker_start_command(
    name,
    config_yaml,
    background,
    queue,
    includes,
    loglevel,
    additional_args,
):
    loglevel_args = ['--loglevel', loglevel]

    if len(queue) > 4:
        check.failed(
            'Can\'t start a dagster_celery worker that listens on more than four queues, due to a '
            'bug in Celery 4.')
    queue_args = []
    for q in queue:
        queue_args += ['-Q', q]

    includes_args = ['-I', ','.join(includes)] if includes else []

    pythonpath = get_config_dir(config_yaml)
    subprocess_args = ([
        'celery', '-A', 'dagster_celery.tasks', 'worker',
        '--prefetch-multiplier=1'
    ] + loglevel_args + queue_args + includes_args +
                       ['-n', get_worker_name(name)] + list(additional_args))

    env = os.environ.copy()
    if pythonpath is not None:
        env['PYTHONPATH'] = '{existing_pythonpath}{pythonpath}:'.format(
            existing_pythonpath=env.get('PYTHONPATH', ''),
            pythonpath=pythonpath)

    if background:
        launch_background_worker(subprocess_args, env=env)
    else:
        return subprocess.check_call(subprocess_args, env=env)
Example #13
0
def worker_start_command(
    name, config_yaml, background, queue, includes, loglevel, app, additional_args,
):
    check.invariant(app, "App must be specified. E.g. dagster_celery.app or dagster_celery_k8s.app")

    loglevel_args = ["--loglevel", loglevel]

    if len(queue) > 4:
        check.failed(
            "Can't start a dagster_celery worker that listens on more than four queues, due to a "
            "bug in Celery 4."
        )
    queue_args = []
    for q in queue:
        queue_args += ["-Q", q]

    includes_args = ["-I", ",".join(includes)] if includes else []

    pythonpath = get_config_dir(config_yaml)
    subprocess_args = (
        ["celery", "-A", app, "worker", "--prefetch-multiplier=1"]
        + loglevel_args
        + queue_args
        + includes_args
        + ["-n", get_worker_name(name)]
        + list(additional_args)
    )

    env = os.environ.copy()
    if pythonpath is not None:
        env["PYTHONPATH"] = "{existing_pythonpath}{pythonpath}:".format(
            existing_pythonpath=env.get("PYTHONPATH", ""), pythonpath=pythonpath
        )

    if background:
        launch_background_worker(subprocess_args, env=env)
    else:
        return subprocess.check_call(subprocess_args, env=env)
Example #14
0
    def __new__(cls, name, invocations, output_mapping_dict):

        dep_dict = {}
        solid_def_dict = {}
        input_mappings = []

        for invocation in invocations.values():
            def_name = invocation.solid_def.name
            if def_name in solid_def_dict and solid_def_dict[def_name] is not invocation.solid_def:
                check.failed(
                    'Detected conflicting solid definitions with the same name "{name}"'.format(
                        name=def_name
                    )
                )
            solid_def_dict[def_name] = invocation.solid_def

            deps = {}
            for input_name, node in invocation.input_bindings.items():
                if isinstance(node, InvokedSolidOutputHandle):
                    solid_name, output_name = node
                    deps[input_name] = DependencyDefinition(solid_name, output_name)
                elif isinstance(node, list) and all(
                    map(lambda item: isinstance(item, InvokedSolidOutputHandle), node)
                ):
                    deps[input_name] = MultiDependencyDefinition(
                        [DependencyDefinition(call.solid_name, call.output_name) for call in node]
                    )
                else:
                    check.failed('Unexpected input binding - got {node}'.format(node=node))

            dep_dict[SolidInvocation(invocation.solid_def.name, invocation.solid_name)] = deps

            for input_name, node in invocation.input_mappings.items():
                input_mappings.append(node.input_def.mapping_to(invocation.solid_name, input_name))

        return super(cls, CompleteCompositionContext).__new__(
            cls, name, list(solid_def_dict.values()), dep_dict, input_mappings, output_mapping_dict
        )
Example #15
0
def _evaluate_config(context):
    check.inst_param(context, 'context', TraversalContext)

    if context.config_type.is_scalar:
        if not context.config_type.is_config_scalar_valid(
                context.config_value):
            return EvaluateValueResult.for_error(create_scalar_error(context))
        return EvaluateValueResult.for_value(context.config_value)

    elif context.config_type.is_any:
        return EvaluateValueResult.for_value(context.config_value)  # yolo

    elif context.config_type.is_selector:
        return evaluate_selector_config(context)

    elif context.config_type.is_composite:
        return evaluate_composite_config(context)

    elif context.config_type.is_list:
        return evaluate_list_config(context)

    elif context.config_type.is_set:
        return evaluate_set_config(context)

    elif context.config_type.is_tuple:
        return evaluate_tuple_config(context)

    elif context.config_type.is_nullable:
        if context.config_value is not None:
            return _evaluate_config(context.for_nullable_inner_type())
        return EvaluateValueResult.for_value(None)

    elif context.config_type.is_enum:
        return evaluate_enum_config(context)

    else:
        check.failed(
            'Unsupported type {name}'.format(name=context.config_type.name))
Example #16
0
def _do_print(config_schema_snapshot, config_type_key, printer, with_lines=True):
    line_break_fn = printer.line if with_lines else lambda string: printer.append(string + " ")

    config_type_snap = config_schema_snapshot.get_config_snap(config_type_key)
    kind = config_type_snap.kind

    if kind == ConfigTypeKind.ARRAY:
        printer.append("[")
        _do_print(config_schema_snapshot, config_type_snap.inner_type_key, printer)
        printer.append("]")
    elif kind == ConfigTypeKind.NONEABLE:
        _do_print(config_schema_snapshot, config_type_snap.inner_type_key, printer)
        printer.append("?")
    elif kind == ConfigTypeKind.SCALAR_UNION:
        printer.append("(")
        _do_print(config_schema_snapshot, config_type_snap.scalar_type_key, printer)
        printer.append(" | ")
        _do_print(config_schema_snapshot, config_type_snap.non_scalar_type_key, printer)
        printer.append(")")
    elif ConfigTypeKind.has_fields(kind):
        line_break_fn("{")
        with printer.with_indent():
            for field_snap in sorted(config_type_snap.fields):
                name = field_snap.name
                if field_snap.is_required:
                    printer.append(name + ": ")
                else:
                    printer.append(name + "?: ")
                _do_print(
                    config_schema_snapshot, field_snap.type_key, printer, with_lines=with_lines,
                )
                line_break_fn("")

        printer.append("}")
    elif config_type_snap.given_name:
        printer.append(config_type_snap.given_name)
    else:
        check.failed("not supported")
Example #17
0
def single_resource_event_generator(context, resource_name, resource_def):
    try:
        msg_fn = lambda: "Error executing resource_fn on ResourceDefinition {name}".format(
            name=resource_name)
        with user_code_error_boundary(DagsterResourceFunctionError, msg_fn):
            try:
                with time_execution_scope() as timer_result:
                    resource_or_gen = (resource_def.resource_fn(context)
                                       if is_context_provided(
                                           get_function_params(
                                               resource_def.resource_fn)) else
                                       resource_def.resource_fn())
                    # Flag for whether resource is generator. This is used to ensure that teardown
                    # occurs when resources are initialized out of execution.
                    is_gen = inspect.isgenerator(resource_or_gen)
                    gen = ensure_gen(resource_or_gen)
                    resource = next(gen)
                resource = InitializedResource(
                    resource, format_duration(timer_result.millis), is_gen)
            except StopIteration:
                check.failed(
                    "Resource generator {name} must yield one item.".format(
                        name=resource_name))

        yield resource

    except DagsterUserCodeExecutionError as dagster_user_error:
        raise dagster_user_error

    with user_code_error_boundary(DagsterResourceFunctionError, msg_fn):
        try:
            next(gen)
        except StopIteration:
            pass
        else:
            check.failed(
                "Resource generator {name} yielded more than one item.".format(
                    name=resource_name))
Example #18
0
def execute_step_out_of_process(
    multiproc_ctx,
    pipeline,
    step_context,
    step,
    errors,
    term_events,
    retries,
    known_state,
):
    command = MultiprocessExecutorChildProcessCommand(
        run_config=step_context.run_config,
        pipeline_run=step_context.pipeline_run,
        step_key=step.key,
        instance_ref=step_context.instance.get_ref(),
        term_event=term_events[step.key],
        recon_pipeline=pipeline,
        retry_mode=retries,
        known_state=known_state,
    )

    yield DagsterEvent.engine_event(
        step_context,
        "Launching subprocess for {}".format(step.key),
        EngineEventData(marker_start=DELEGATE_MARKER),
        step_handle=step.handle,
    )

    for ret in execute_child_process_command(multiproc_ctx, command):
        if ret is None or isinstance(ret, DagsterEvent):
            yield ret
        elif isinstance(ret, ChildProcessEvent):
            if isinstance(ret, ChildProcessSystemErrorEvent):
                errors[ret.pid] = ret.error_info
        else:
            check.failed(
                "Unexpected return value from child process {}".format(
                    type(ret)))
Example #19
0
def _deps_by_arg_name(input_assets, fn):
    '''
    Args:
        input_assets (Optional[Union[List[Asset], Dict[str, Asset]]])
        fn (Callable)

    Returns (Dict[str, AssetDependency])
    '''
    kwarg_types = _infer_kwarg_types(fn)
    if isinstance(input_assets, list):
        check.invariant(
            len(kwarg_types) == len(input_assets),
            'For {fn_name}, input_assets length "{input_assets_len}"" must match number of '
            'keyword args "{num_kwargs}"'.format(
                fn_name=fn.__name__,
                input_assets_len=len(input_assets),
                num_kwargs=len(kwarg_types),
            ),
        )
        return {
            kwarg: AssetDependency(input_asset, kwarg_types[kwarg])
            for kwarg, input_asset in zip(kwarg_types.keys(), input_assets)
        }
    elif isinstance(input_assets, dict):
        check.invariant(
            kwarg_types.keys() == input_assets.keys(),
            'input_assets keys {kwarg_deps_keys} must match keyword args {kwargs}'
            .format(
                kwarg_deps_keys=input_assets.keys(),
                kwargs=kwarg_types.keys(),
            ),
        )
        return {
            kwarg: AssetDependency(input_assets[kwarg], kwarg_types[kwarg])
            for kwarg in kwarg_types.keys()
        }
    else:
        check.failed('input_assets must be a list or a dict')
Example #20
0
    def execute_graphql_subscription(
        self,
        websocket: WebSocket,
        operation_id: str,
        query: str,
        variables: Optional[Dict[str, Any]],
        operation_name: Optional[str],
    ) -> Tuple[Optional[Task], Optional[Dict[str, Any]]]:
        request_context = self.make_request_context(websocket)
        try:
            async_result = self._graphql_schema.execute(
                query,
                variables=variables,
                operation_name=operation_name,
                context=request_context,
                allow_subscriptions=True,
            )
        except GraphQLError as error:
            error_payload = format_graphql_error(error)
            return None, error_payload

        if isinstance(async_result, ExecutionResult):
            if not async_result.errors:
                check.failed(
                    f"Only expect non-async result on error, got {async_result}"
                )
            error_payload = format_graphql_error(
                async_result.errors[0])  # type: ignore
            return None, error_payload

        # in the future we should get back async gen directly, back compat for now
        disposable, async_gen = _disposable_and_async_gen_from_obs(
            async_result, get_event_loop())
        task = get_event_loop().create_task(
            _handle_async_results(async_gen, operation_id, websocket))
        task.add_done_callback(lambda _: disposable.dispose())

        return task, None
Example #21
0
    def submit_run(self, context: SubmitRunContext) -> PipelineRun:
        pipeline_run = context.pipeline_run
        check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED)

        enqueued_event = DagsterEvent(
            event_type_value=DagsterEventType.PIPELINE_ENQUEUED.value,
            pipeline_name=pipeline_run.pipeline_name,
        )
        event_record = EventLogEntry(
            user_message="",
            level=logging.INFO,
            pipeline_name=pipeline_run.pipeline_name,
            run_id=pipeline_run.run_id,
            error_info=None,
            timestamp=time.time(),
            dagster_event=enqueued_event,
        )
        self._instance.handle_new_event(event_record)

        run = self._instance.get_run_by_id(pipeline_run.run_id)
        if run is None:
            check.failed(f"Failed to reload run {pipeline_run.run_id}")
        return run
Example #22
0
def get_loadable_targets(python_file, module_name, working_directory,
                         attribute):
    from dagster.cli.workspace.autodiscovery import (
        LoadableTarget,
        loadable_targets_from_python_file,
        loadable_targets_from_python_module,
    )

    if python_file:
        return ([
            LoadableTarget(
                attribute,
                load_def_in_python_file(python_file, attribute,
                                        working_directory))
        ] if attribute else loadable_targets_from_python_file(
            python_file, working_directory))
    elif module_name:
        return ([
            LoadableTarget(attribute, load_def_in_module(
                module_name, attribute))
        ] if attribute else loadable_targets_from_python_module(module_name))
    else:
        check.failed("invalid")
Example #23
0
    def __new__(
        cls,
        dependencies: List[Union[DependencyDefinition, Type["MappedInputPlaceholder"]]],
    ):
        from .composition import MappedInputPlaceholder

        deps = check.list_param(dependencies, "dependencies")
        seen = {}
        for dep in deps:
            if isinstance(dep, DependencyDefinition):
                key = dep.solid + ":" + dep.output
                if key in seen:
                    raise DagsterInvalidDefinitionError(
                        'Duplicate dependencies on node "{dep.solid}" output "{dep.output}" '
                        "used in the same MultiDependencyDefinition.".format(dep=dep)
                    )
                seen[key] = True
            elif dep is MappedInputPlaceholder:
                pass
            else:
                check.failed("Unexpected dependencies entry {}".format(dep))

        return super(MultiDependencyDefinition, cls).__new__(cls, deps)
Example #24
0
    def from_results(context, results):
        check.inst_param(context, 'context', RuntimeExecutionContext)
        results = check.list_param(results, 'results', StepResult)
        if results:
            step_results_by_kind = defaultdict(list)

            solid = None
            for result in results:
                if solid is None:
                    solid = result.step.solid
                check.invariant(result.step.solid is solid,
                                'Must all be from same solid')

            for result in results:
                step_results_by_kind[result.kind].append(result)

            return SolidExecutionResult(
                context=context,
                solid=results[0].step.solid,
                step_results_by_kind=dict(step_results_by_kind),
            )
        else:
            check.failed("Cannot create SolidExecutionResult from empty list")
Example #25
0
    def from_step_events(pipeline_context, step_events):
        check.inst_param(pipeline_context, 'pipeline_context',
                         PipelineExecutionContext)
        step_events = check.list_param(step_events, 'step_events',
                                       ExecutionStepEvent)
        if step_events:
            step_events_by_kind = defaultdict(list)

            solid = None
            for result in step_events:
                if solid is None:
                    solid = result.step.solid
                check.invariant(result.step.solid is solid,
                                'Must all be from same solid')

            for result in step_events:
                step_events_by_kind[result.kind].append(result)

            return SolidExecutionResult(
                solid=step_events[0].step.solid,
                step_events_by_kind=dict(step_events_by_kind))
        else:
            check.failed("Cannot create SolidExecutionResult from empty list")
Example #26
0
def execute_plan_iterator(
    execution_plan, environment_dict=None, run_config=None, step_keys_to_execute=None, instance=None
):
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    environment_dict = check.opt_dict_param(environment_dict, 'environment_dict')
    run_config = check_run_config_param(run_config, execution_plan.pipeline_def)
    check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str)
    instance = check.inst_param(instance, 'instance', DagsterInstance)

    step_keys_to_execute = _resolve_step_keys(execution_plan, step_keys_to_execute)

    with scoped_pipeline_context(
        execution_plan.pipeline_def, environment_dict, run_config, instance
    ) as pipeline_context:

        return _steps_execution_iterator(
            pipeline_context,
            execution_plan=execution_plan,
            run_config=run_config,
            step_keys_to_execute=step_keys_to_execute,
        )

    check.failed('Unexpected state, should be unreachable')
Example #27
0
def resolve_to_runtime_type(dagster_type):
    # circular dep
    from .decorator import is_runtime_type_decorated_klass, get_runtime_type_on_decorated_klass
    from .mapping import remap_python_type

    dagster_type = remap_python_type(dagster_type)

    check_dagster_type_param(dagster_type, 'dagster_type', RuntimeType)

    if dagster_type is None:
        return Any.inst()
    if BuiltinEnum.contains(dagster_type):
        return RuntimeType.from_builtin_enum(dagster_type)
    if isinstance(dagster_type, WrappingListType):
        return resolve_to_runtime_list(dagster_type)
    if isinstance(dagster_type, WrappingNullableType):
        return resolve_to_runtime_nullable(dagster_type)
    if is_runtime_type_decorated_klass(dagster_type):
        return get_runtime_type_on_decorated_klass(dagster_type)
    if issubclass(dagster_type, RuntimeType):
        return dagster_type.inst()

    check.failed('should not reach')
Example #28
0
    def execute_step_out_of_process(self, step_context, step, errors,
                                    term_events):
        command = InProcessExecutorChildProcessCommand(
            run_config=step_context.run_config,
            pipeline_run=step_context.pipeline_run,
            step_key=step.key,
            instance_ref=step_context.instance.get_ref(),
            term_event=term_events[step.key],
            recon_pipeline=self.pipeline,
            retries=self.retries,
        )

        yield DagsterEvent.engine_event(
            step_context,
            "Launching subprocess for {}".format(step.key),
            EngineEventData(marker_start=DELEGATE_MARKER),
            step_key=step.key,
        )

        for ret in execute_child_process_command(command):
            if ret is None or isinstance(ret, DagsterEvent):
                yield ret
            elif isinstance(ret, ChildProcessEvent):
                if isinstance(ret, ChildProcessSystemErrorEvent):
                    errors[ret.pid] = ret.error_info
            elif isinstance(ret, KeyboardInterrupt):
                yield DagsterEvent.engine_event(
                    step_context,
                    "Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes",
                    EngineEventData.interrupted(list(term_events.keys())),
                )
                for term_event in term_events.values():
                    term_event.set()
            else:
                check.failed(
                    "Unexpected return value from child process {}".format(
                        type(ret)))
Example #29
0
    def build_pipeline_definition(self, name, assets_to_update):
        solid_defs = {}
        for asset in assets_to_update:
            if asset.computation:
                for mode in self._modes_by_name.values():
                    self.check_has_policy(
                        mode, asset.computation.output_in_memory_type,
                        asset.storage_key)

                for mode in self._modes_by_name.values():
                    for dep in asset.computation.deps.values():
                        self.check_has_policy(mode, dep.in_memory_type,
                                              dep.asset.storage_key)

                solid_defs[asset.path] = self.get_computed_asset_solid_def(
                    asset, assets_to_update)
            else:
                check.failed(
                    'All elements of assets_to_update must have computations')

        solid_deps = {
            solid_defs[asset.path].name: {
                solid_defs[dep.asset.path].name:
                DependencyDefinition(solid_defs[dep.asset.path].name)
                for dep in asset.computation.deps.values()
                if dep.asset in assets_to_update
            }
            for asset in assets_to_update if asset.computation
        }

        return PipelineDefinition(
            name=name,
            solid_defs=list(solid_defs.values()),
            mode_defs=list(self._modes_by_name.values()),
            dependencies=solid_deps,
            preset_defs=list(self._presets_by_name.values()),
        )
Example #30
0
def created_workspace_load_target(kwargs):
    check.dict_param(kwargs, 'kwargs')
    if are_all_keys_empty(kwargs, WORKSPACE_CLI_ARGS):
        if kwargs.get('empty_workspace'):
            return EmptyWorkspaceTarget()
        if os.path.exists('workspace.yaml'):
            return WorkspaceFileTarget(paths=['workspace.yaml'])
        elif os.path.exists('repository.yaml'):
            warnings.warn(
                'You are automatically loading a "repository.yaml", a deprecated '
                'capability. This capability will be eliminated in 0.9.0.')
            return WorkspaceFileTarget(paths=['repository.yaml'])
        raise click.UsageError(
            'No arguments given and workspace.yaml not found.')
    if kwargs.get('repository_yaml'):
        warnings.warn(
            'You have used -y or --repository-yaml to load a workspace. '
            'This is deprecated and will be eliminated in 0.9.0.')
        _check_cli_arguments_none(kwargs, 'python_file', 'module_name',
                                  'attribute', 'workspace')
        return WorkspaceFileTarget(paths=[kwargs['repository_yaml']])
    if kwargs.get('workspace'):
        _check_cli_arguments_none(kwargs, 'python_file', 'module_name',
                                  'attribute')
        return WorkspaceFileTarget(paths=list(kwargs['workspace']))
    if kwargs.get('python_file'):
        _check_cli_arguments_none(kwargs, 'workspace', 'module_name')
        return PythonFileTarget(
            python_file=kwargs.get('python_file'),
            attribute=kwargs.get('attribute'),
        )
    if kwargs.get('module_name'):
        return ModuleTarget(
            module_name=kwargs.get('module_name'),
            attribute=kwargs.get('attribute'),
        )
    check.failed('invalid')