Exemple #1
0
def _yield_transform_results(compute_context, inputs, compute_fn):
    check.inst_param(compute_context, 'compute_context',
                     SystemComputeExecutionContext)
    step = compute_context.step
    gen = compute_fn(ComputeExecutionContext(compute_context), inputs)

    if isinstance(gen, Output):
        raise DagsterInvariantViolationError((
            'Transform for solid {solid_name} returned a Output rather than '
            'yielding it. The compute_fn of the core SolidDefinition must yield '
            'its results').format(solid_name=str(step.solid_handle)))

    if gen is None:
        return

    for result in gen:
        if isinstance(result, Output):
            value_repr = repr(result.value)
            compute_context.log.info(
                'Solid {solid} emitted output "{output}" value {value}'.format(
                    solid=str(step.solid_handle),
                    output=result.output_name,
                    value=value_repr[:200] +
                    '...' if len(value_repr) > 200 else value_repr,
                ))
            yield Output(output_name=result.output_name, value=result.value)

        elif isinstance(result, (Materialization, ExpectationResult)):
            yield result

        else:
            raise DagsterInvariantViolationError(
                ('Transform for solid {solid_name} yielded {result} rather an '
                 'an instance of the Output or Materialization class.').format(
                     result=repr(result), solid_name=str(step.solid_handle)))
Exemple #2
0
    def _do_expectation(expectation_context, inputs):
        check.inst_param(expectation_context, 'step_context', SystemStepExecutionContext)
        value = inputs[EXPECTATION_INPUT]
        expectation_context = expectation_context.for_expectation(inout_def, expectation_def)
        expt_result = expectation_def.expectation_fn(
            ExpectationExecutionContext(expectation_context), value
        )

        if not isinstance(expt_result, ExpectationResult):
            raise DagsterInvariantViolationError(
                (
                    'Expectation for solid {solid_name} on {desc_key} {inout_name} '
                    'did not return an ExpectationResult'.format(
                        solid_name=solid.name,
                        desc_key=inout_def.descriptive_key,
                        inout_name=inout_def.name,
                    )
                )
            )

        if expt_result.success:
            yield expt_result
            yield Output(output_name=internal_output_name, value=inputs[EXPECTATION_INPUT])
        else:
            expectation_context.log.debug(
                'Expectation {key} failed on {value}.'.format(
                    key=expectation_context.step.key, value=value
                )
            )
            raise DagsterIOExpectationFailedError(expectation_context, value)
Exemple #3
0
def _step_output_error_checked_user_event_sequence(step_context, user_event_sequence):
    '''
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    '''
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.generator_param(user_event_sequence, 'user_event_sequence')

    step = step_context.step
    output_names = list([output_def.name for output_def in step.step_outputs])
    seen_outputs = set()

    for user_event in user_event_sequence:
        if not isinstance(user_event, Output):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(output.output_name):
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{output.output_name}" that does not exist. The available '
                'outputs are {output_names}'.format(
                    handle=str(step.solid_handle), output=output, output_names=output_names
                )
            )

        if output.output_name in seen_outputs:
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{output.output_name}" multiple times'.format(
                    handle=str(step.solid_handle), output=output
                )
            )

        yield output
        seen_outputs.add(output.output_name)

    for step_output_def in step.step_outputs:
        if not step_output_def.name in seen_outputs and not step_output_def.optional:
            if step_output_def.runtime_type.is_nothing:
                step_context.log.info(
                    'Emitting implicit Nothing for output "{output}" on solid {solid}'.format(
                        output=step_output_def.name, solid={str(step.solid_handle)}
                    )
                )
                yield Output(output_name=step_output_def.name, value=None)
            else:
                raise DagsterStepOutputNotFoundError(
                    'Core compute for solid "{handle}" did not return an output '
                    'for non-optional output "{step_output_def.name}"'.format(
                        handle=str(step.solid_handle), step_output_def=step_output_def
                    ),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemple #4
0
def _yield_compute_results(compute_context, inputs, compute_fn):
    check.inst_param(compute_context, 'compute_context',
                     SystemComputeExecutionContext)
    step = compute_context.step
    gen = compute_fn(ComputeExecutionContext(compute_context), inputs)

    if isinstance(gen, Output):
        raise DagsterInvariantViolationError((
            'Compute function for solid {solid_name} returned a Output rather than '
            'yielding it. The compute_fn of the core SolidDefinition must yield '
            'its results').format(solid_name=str(step.solid_handle)))

    if gen is None:
        return

    for result in gen:
        if isinstance(result, Output):
            value_repr = repr(result.value)
            compute_context.log.info(
                "Solid '{solid}' emitted output '{output}' value {value}".
                format(
                    solid=str(step.solid_handle),
                    output=result.output_name,
                    value=value_repr[:200] +
                    '...' if len(value_repr) > 200 else value_repr,
                ))
            yield Output(output_name=result.output_name, value=result.value)

        elif isinstance(result, Materialization):
            compute_context.log.info(
                "Solid '{solid}' materialized '{label}'".format(
                    solid=str(step.solid_handle), label=result.label))

            yield result
        elif isinstance(result, ExpectationResult):
            compute_context.log.info(
                "Solid '{solid}' {status} expectation '{label}'".format(
                    solid=str(step.solid_handle),
                    status='passed' if result.success else 'failed',
                    label=result.label,
                ))

            yield result
        else:
            raise DagsterInvariantViolationError((
                'Compute function for solid {solid_name} yielded {result} rather than '
                'an instance of the Output or Materialization class.').format(
                    result=repr(result), solid_name=str(step.solid_handle)))
Exemple #5
0
def __empty_join(_context, _inputs):
    yield Output(output_name=JOIN_OUTPUT, value=None)
Exemple #6
0
def __merge_join(_context, inputs):
    yield Output(output_name=JOIN_OUTPUT, value=list(inputs.values()))
def _validate_and_coerce_solid_result_to_iterator(result, context,
                                                  output_defs):

    if isinstance(result,
                  (AssetMaterialization, Materialization, ExpectationResult)):
        raise DagsterInvariantViolationError((
            "Error in {described_op}: If you are returning an AssetMaterialization "
            "or an ExpectationResult from {node_type} you must yield them to avoid "
            "ambiguity with an implied result from returning a value.".format(
                described_op=context.describe_op(),
                node_type=context.solid_def.node_type_str,
            )))

    if inspect.isgenerator(result):
        # this happens when a user explicitly returns a generator in the solid
        for event in result:
            yield event
    elif isinstance(result, Output):
        yield result
    elif len(output_defs) == 1:
        if result is None and output_defs[0].is_required is False:
            context.log.warn(
                'Value "None" returned for non-required output "{output_name}" of {described_op}. '
                "This value will be passed to downstream {node_type}s. For conditional execution use\n"
                '  yield Output(value, "{output_name}")\n'
                "when you want the downstream {node_type}s to execute, "
                "and do not yield it when you want downstream solids to skip.".
                format(
                    output_name=output_defs[0].name,
                    described_op=context.describe_op(),
                    node_type=context.solid_def.node_type_str,
                ))
        yield Output(value=result, output_name=output_defs[0].name)
    elif len(output_defs) > 1 and isinstance(result, tuple):
        if len(result) != len(output_defs):
            check.failed(
                f"Solid '{context.solid_name}' has {len(output_defs)} output definitions, but "
                f"returned a tuple with {len(result)} elements")

        for output_defs, element in zip(output_defs, result):
            yield Output(output_name=output_defs.name, value=element)
    elif result is not None:
        if not output_defs:
            raise DagsterInvariantViolationError((
                "Error in {described_op}: Unexpectedly returned output {result} "
                "of type {type_}. {node_type} is explicitly defined to return no "
                "results.").format(
                    described_op=context.describe_op(),
                    result=result,
                    type_=type(result),
                    node_type=context.solid_def.node_type_str.capitalize(),
                ))

        raise DagsterInvariantViolationError(
            ("Error in {described_op}: {node_type} unexpectedly returned "
             "output {result} of type {type_}. Should "
             "be a generator, containing or yielding "
             "{n_results} results: {{{expected_results}}}.").format(
                 described_op=context.describe_op(),
                 node_type=context.solid_def.node_type_str,
                 result=result,
                 type_=type(result),
                 n_results=len(output_defs),
                 expected_results=", ".join([
                     "'{result_name}': {dagster_type}".format(
                         result_name=output_def.name,
                         dagster_type=output_def.dagster_type,
                     ) for output_def in output_defs
                 ]),
             ))
Exemple #8
0
def _step_output_error_checked_user_event_sequence(
    step_context: SystemStepExecutionContext,
    user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    output_names = list([output_def.name for output_def in step.step_outputs])
    seen_outputs: Set[str] = set()
    seen_mapping_keys: Dict[str, Set[str]] = defaultdict(set)

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(output.output_name):
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{output.output_name}" that does not exist. The available '
                "outputs are {output_names}".format(handle=str(
                    step.solid_handle),
                                                    output=output,
                                                    output_names=output_names))

        step_output = step.step_output_named(output.output_name)
        output_def = step_context.pipeline_def.get_solid(
            step_output.solid_handle).output_def_named(step_output.name)

        if isinstance(output, Output):
            if output.output_name in seen_outputs:
                raise DagsterInvariantViolationError(
                    'Compute for solid "{handle}" returned an output '
                    '"{output.output_name}" multiple times'.format(
                        handle=str(step.solid_handle), output=output))

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" for output "{output.output_name}" '
                    "defined as dynamic must yield DynamicOutput, got Output.")
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput, '
                    "but did not use DynamicOutputDefinition.")
            if output.mapping_key in seen_mapping_keys[output.output_name]:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput with '
                    f'mapping_key "{output.mapping_key}" multiple times.')
            seen_mapping_keys[output.output_name].add(output.mapping_key)

        yield output
        seen_outputs.add(output.output_name)

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(
            step_output.name)
        if not step_output_def.name in seen_outputs and not step_output_def.optional:
            if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING:
                step_context.log.info(
                    'Emitting implicit Nothing for output "{output}" on solid {solid}'
                    .format(output=step_output_def.name,
                            solid={str(step.solid_handle)}))
                yield Output(output_name=step_output_def.name, value=None)
            else:
                raise DagsterStepOutputNotFoundError(
                    'Core compute for solid "{handle}" did not return an output '
                    'for non-optional output "{step_output_def.name}"'.format(
                        handle=str(step.solid_handle),
                        step_output_def=step_output_def),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemple #9
0
def _step_output_error_checked_user_event_sequence(
    step_context: StepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    op_label = step_context.describe_op()
    output_names = list([output_def.name for output_def in step.step_outputs])

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(cast(str, output.output_name)):
            raise DagsterInvariantViolationError(
                f'Core compute for {op_label} returned an output "{output.output_name}" that does '
                f"not exist. The available outputs are {output_names}"
            )

        step_output = step.step_output_named(cast(str, output.output_name))
        output_def = step_context.pipeline_def.get_solid(step_output.solid_handle).output_def_named(
            step_output.name
        )

        if isinstance(output, Output):
            if step_context.has_seen_output(output.output_name):
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} returned an output "{output.output_name}" multiple '
                    "times"
                )

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} for output "{output.output_name}" defined as dynamic '
                    "must yield DynamicOutput, got Output."
                )

            step_context.observe_output(output.output_name)

            metadata = step_context.get_output_metadata(output.output_name)
            output = Output(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
            )
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput, but did not use "
                    "DynamicOutputDefinition."
                )
            if step_context.has_seen_output(output.output_name, output.mapping_key):
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput with mapping_key "
                    f'"{output.mapping_key}" multiple times.'
                )
            step_context.observe_output(output.output_name, output.mapping_key)
            metadata = step_context.get_output_metadata(
                output.output_name, mapping_key=output.mapping_key
            )
            output = DynamicOutput(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
                mapping_key=output.mapping_key,
            )

        yield output

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(step_output.name)
        if not step_context.has_seen_output(step_output_def.name) and not step_output_def.optional:
            if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING:
                step_context.log.info(
                    f'Emitting implicit Nothing for output "{step_output_def.name}" on {op_label}'
                )
                yield Output(output_name=step_output_def.name, value=None)
            elif not step_output_def.is_dynamic:
                raise DagsterStepOutputNotFoundError(
                    (
                        f"Core compute for {op_label} did not return an output for non-optional "
                        f'output "{step_output_def.name}"'
                    ),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemple #10
0
def __select_first_join(_context, inputs):
    yield Output(output_name=JOIN_OUTPUT, value=list(inputs.values())[0])