Exemple #1
0
def _error_check_step_output_values(step, step_output_values):
    check.inst_param(step, 'step', ExecutionStep)
    check.generator_param(step_output_values, 'step_output_values')

    seen_outputs = set()
    for step_output_value in step_output_values:
        if not step.has_step_output(step_output_value.output_name):
            output_names = list([
                output_def.name
                for output_def in step.solid.definition.output_defs
            ])
            raise DagsterInvariantViolationError(
                'Core transform for {step.solid.name} returned an output '
                '{step_output_value.output_name} that does not exist. The available '
                'outputs are {output_names}'.format(
                    step=step,
                    step_output_value=step_output_value,
                    output_names=output_names))

        if step_output_value.output_name in seen_outputs:
            raise DagsterInvariantViolationError(
                'Core transform for {step.solid.name} returned an output '
                '{step_output_value.output_name} multiple times'.format(
                    step=step, step_output_value=step_output_value))

        yield step_output_value
        seen_outputs.add(step_output_value.output_name)
Exemple #2
0
def _step_output_error_checked_user_event_sequence(step_context, user_event_sequence):
    '''
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    '''
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.generator_param(user_event_sequence, 'user_event_sequence')

    step = step_context.step
    output_names = list([output_def.name for output_def in step.step_outputs])
    seen_outputs = set()

    for user_event in user_event_sequence:
        if not isinstance(user_event, Output):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(output.output_name):
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{output.output_name}" that does not exist. The available '
                'outputs are {output_names}'.format(
                    handle=str(step.solid_handle), output=output, output_names=output_names
                )
            )

        if output.output_name in seen_outputs:
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{output.output_name}" multiple times'.format(
                    handle=str(step.solid_handle), output=output
                )
            )

        yield output
        seen_outputs.add(output.output_name)

    for step_output_def in step.step_outputs:
        if not step_output_def.name in seen_outputs and not step_output_def.optional:
            if step_output_def.runtime_type.is_nothing:
                step_context.log.info(
                    'Emitting implicit Nothing for output "{output}" on solid {solid}'.format(
                        output=step_output_def.name, solid={str(step.solid_handle)}
                    )
                )
                yield Output(output_name=step_output_def.name, value=None)
            else:
                raise DagsterStepOutputNotFoundError(
                    'Core compute for solid "{handle}" did not return an output '
                    'for non-optional output "{step_output_def.name}"'.format(
                        handle=str(step.solid_handle), step_output_def=step_output_def
                    ),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
def _error_check_step_outputs(step_context, step_output_iter):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.generator_param(step_output_iter, 'step_output_iter')

    step = step_context.step
    output_names = list([output_def.name for output_def in step.step_outputs])
    seen_outputs = set()

    for step_output in step_output_iter:
        if not isinstance(step_output, StepOutputValue):
            yield step_output
            continue

        # do additional processing on StepOutputValues
        step_output_value = step_output
        if not step.has_step_output(step_output_value.output_name):
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{step_output_value.output_name}" that does not exist. The available '
                'outputs are {output_names}'.format(
                    handle=str(step.solid_handle),
                    step_output_value=step_output_value,
                    output_names=output_names,
                ))

        if step_output_value.output_name in seen_outputs:
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{step_output_value.output_name}" multiple times'.format(
                    handle=str(step.solid_handle),
                    step_output_value=step_output_value))

        yield step_output_value
        seen_outputs.add(step_output_value.output_name)

    for step_output_def in step.step_outputs:
        if not step_output_def.name in seen_outputs and not step_output_def.optional:
            if step_output_def.runtime_type.is_nothing:
                step_context.log.info(
                    'Emitting implicit Nothing for output "{output}" on solid {solid}'
                    .format(output=step_output_def.name,
                            solid={str(step.solid_handle)}))
                yield StepOutputValue(output_name=step_output_def.name,
                                      value=None)
            else:
                raise DagsterStepOutputNotFoundError(
                    'Core compute for solid "{handle}" did not return an output '
                    'for non-optional output "{step_output_def.name}"'.format(
                        handle=str(step.solid_handle),
                        step_output_def=step_output_def),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemple #4
0
def backoff(
    fn,
    retry_on,
    args=None,
    kwargs=None,
    max_retries=BACKOFF_MAX_RETRIES,
    delay_generator=backoff_delay_generator(),
):
    """Straightforward backoff implementation.

    Note that this doesn't implement any jitter on the delays, so probably won't be appropriate for very
    parallel situations.
    
    Args:
        fn (Callable): The function to wrap in a backoff/retry loop.
        retry_on (Tuple[Exception, ...]): The exception classes on which to retry. Note that we don't (yet)
            have any support for matching the exception messages.
        args (Optional[List[Any]]): Positional args to pass to the callable.
        kwargs (Optional[Dict[str, Any]]): Keyword args to pass to the callable.
        max_retries (Optional[Int]): The maximum number of times to retry a failed fn call. Set to 0 for no backoff.
            Default: 4
        delay_generator (Generator[float, None, None]): Generates the successive delays between retry attempts.
    """
    check.callable_param(fn, "fn")
    retry_on = check.tuple_param(retry_on, "retry_on")
    args = check.opt_list_param(args, "args")
    kwargs = check.opt_dict_param(kwargs, "kwargs", key_type=str)
    check.int_param(max_retries, "max_retries")
    check.generator_param(delay_generator, "delay_generator")

    retries = 0

    to_raise = None

    try:
        return fn(*args, **kwargs)
    except retry_on as exc:
        to_raise = exc

    while retries < max_retries:
        time.sleep(six.next(delay_generator))
        try:
            return fn(*args, **kwargs)
        except retry_on as exc:
            retries += 1
            to_raise = exc
            continue

    raise to_raise
Exemple #5
0
def _error_check_step_outputs(step, step_output_iter):
    check.inst_param(step, 'step', ExecutionStep)
    check.generator_param(step_output_iter, 'step_output_iter')

    output_names = list([output_def.name for output_def in step.step_outputs])
    seen_outputs = set()

    for step_output in step_output_iter:
        if not isinstance(step_output, StepOutputValue):
            yield step_output
            continue

        # do additional processing on StepOutputValues
        step_output_value = step_output
        if not step.has_step_output(step_output_value.output_name):
            raise DagsterInvariantViolationError(
                'Core transform for solid "{step.solid.name}" returned an output '
                '"{step_output_value.output_name}" that does not exist. The available '
                'outputs are {output_names}'.format(
                    step=step, step_output_value=step_output_value, output_names=output_names
                )
            )

        if step_output_value.output_name in seen_outputs:
            raise DagsterInvariantViolationError(
                'Core transform for solid "{step.solid.name}" returned an output '
                '"{step_output_value.output_name}" multiple times'.format(
                    step=step, step_output_value=step_output_value
                )
            )

        yield step_output_value
        seen_outputs.add(step_output_value.output_name)

    for step_output_def in step.step_outputs:
        if not step_output_def.name in seen_outputs and not step_output_def.optional:
            raise DagsterStepOutputNotFoundError(
                'Core transform for solid "{step.solid.name}" did not return an output '
                'for non-optional output "{step_output_def.name}"'.format(
                    step=step, step_output_def=step_output_def
                ),
                step_key=step.key,
                output_name=step_output_def.name,
            )
Exemple #6
0
def test_generator_param():
    def _test_gen():
        yield 1

    assert check.generator_param(_test_gen(), "gen")

    gen = _test_gen()
    assert check.generator(gen)
    assert list(gen) == [1]
    assert check.generator(gen)
    assert list(gen) == []

    with pytest.raises(ParameterCheckError):
        assert check.generator_param(list(gen), "gen")

    with pytest.raises(ParameterCheckError):
        assert check.generator_param(None, "gen")

    with pytest.raises(ParameterCheckError):
        assert check.generator_param(_test_gen, "gen")
Exemple #7
0
def _step_output_error_checked_user_event_sequence(
    step_context: SystemStepExecutionContext,
    user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    output_names = list([output_def.name for output_def in step.step_outputs])
    seen_outputs: Set[str] = set()
    seen_mapping_keys: Dict[str, Set[str]] = defaultdict(set)

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(output.output_name):
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{output.output_name}" that does not exist. The available '
                "outputs are {output_names}".format(handle=str(
                    step.solid_handle),
                                                    output=output,
                                                    output_names=output_names))

        step_output = step.step_output_named(output.output_name)
        output_def = step_context.pipeline_def.get_solid(
            step_output.solid_handle).output_def_named(step_output.name)

        if isinstance(output, Output):
            if output.output_name in seen_outputs:
                raise DagsterInvariantViolationError(
                    'Compute for solid "{handle}" returned an output '
                    '"{output.output_name}" multiple times'.format(
                        handle=str(step.solid_handle), output=output))

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" for output "{output.output_name}" '
                    "defined as dynamic must yield DynamicOutput, got Output.")
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput, '
                    "but did not use DynamicOutputDefinition.")
            if output.mapping_key in seen_mapping_keys[output.output_name]:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput with '
                    f'mapping_key "{output.mapping_key}" multiple times.')
            seen_mapping_keys[output.output_name].add(output.mapping_key)

        yield output
        seen_outputs.add(output.output_name)

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(
            step_output.name)
        if not step_output_def.name in seen_outputs and not step_output_def.optional:
            if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING:
                step_context.log.info(
                    'Emitting implicit Nothing for output "{output}" on solid {solid}'
                    .format(output=step_output_def.name,
                            solid={str(step.solid_handle)}))
                yield Output(output_name=step_output_def.name, value=None)
            else:
                raise DagsterStepOutputNotFoundError(
                    'Core compute for solid "{handle}" did not return an output '
                    'for non-optional output "{step_output_def.name}"'.format(
                        handle=str(step.solid_handle),
                        step_output_def=step_output_def),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemple #8
0
def _step_output_error_checked_user_event_sequence(
    step_context: StepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    op_label = step_context.describe_op()
    output_names = list([output_def.name for output_def in step.step_outputs])

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(cast(str, output.output_name)):
            raise DagsterInvariantViolationError(
                f'Core compute for {op_label} returned an output "{output.output_name}" that does '
                f"not exist. The available outputs are {output_names}"
            )

        step_output = step.step_output_named(cast(str, output.output_name))
        output_def = step_context.pipeline_def.get_solid(step_output.solid_handle).output_def_named(
            step_output.name
        )

        if isinstance(output, Output):
            if step_context.has_seen_output(output.output_name):
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} returned an output "{output.output_name}" multiple '
                    "times"
                )

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} for output "{output.output_name}" defined as dynamic '
                    "must yield DynamicOutput, got Output."
                )

            step_context.observe_output(output.output_name)

            metadata = step_context.get_output_metadata(output.output_name)
            output = Output(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
            )
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput, but did not use "
                    "DynamicOutputDefinition."
                )
            if step_context.has_seen_output(output.output_name, output.mapping_key):
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput with mapping_key "
                    f'"{output.mapping_key}" multiple times.'
                )
            step_context.observe_output(output.output_name, output.mapping_key)
            metadata = step_context.get_output_metadata(
                output.output_name, mapping_key=output.mapping_key
            )
            output = DynamicOutput(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
                mapping_key=output.mapping_key,
            )

        yield output

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(step_output.name)
        if not step_context.has_seen_output(step_output_def.name) and not step_output_def.optional:
            if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING:
                step_context.log.info(
                    f'Emitting implicit Nothing for output "{step_output_def.name}" on {op_label}'
                )
                yield Output(output_name=step_output_def.name, value=None)
            elif not step_output_def.is_dynamic:
                raise DagsterStepOutputNotFoundError(
                    (
                        f"Core compute for {op_label} did not return an output for non-optional "
                        f'output "{step_output_def.name}"'
                    ),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )