예제 #1
0
def test_event_log_storage_store_with_multiple_runs(
        event_storage_factory_cm_fn):
    with event_storage_factory_cm_fn() as storage:
        runs = ['foo', 'bar', 'baz']
        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 0
            storage.store_event(
                DagsterEventRecord(
                    None,
                    'Message2',
                    'debug',
                    '',
                    run_id,
                    time.time(),
                    dagster_event=DagsterEvent(
                        DagsterEventType.STEP_SUCCESS.value,
                        'nonce',
                        event_specific_data=StepSuccessData(duration_ms=100.0),
                    ),
                ))

        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 1
            assert storage.get_stats_for_run(run_id).steps_succeeded == 1

        storage.wipe()
        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 0
예제 #2
0
def _execute_steps_core_loop(step_context, inputs, intermediates_manager):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(inputs, 'inputs', key_type=str)
    check.inst_param(intermediates_manager, 'intermediates_manager',
                     IntermediatesManager)

    evaluated_inputs = {}
    # do runtime type checks of inputs versus step inputs
    for input_name, input_value in inputs.items():
        evaluated_inputs[input_name] = _get_evaluated_input(
            step_context.step, input_name, input_value)
    yield DagsterEvent.step_start_event(step_context)

    with time_execution_scope() as timer_result:
        step_output_iterator = check.generator(
            _iterate_step_outputs_within_boundary(step_context,
                                                  evaluated_inputs))
    for step_output in check.generator(
            _error_check_step_outputs(step_context, step_output_iterator)):

        if isinstance(step_output, StepOutputValue):
            yield _create_step_output_event(step_context, step_output,
                                            intermediates_manager)
        elif isinstance(step_output, Materialization):
            yield DagsterEvent.step_materialization(step_context, step_output)
        elif isinstance(step_output, ExpectationResult):
            yield DagsterEvent.step_expectation_result(step_context,
                                                       step_output)
        else:
            check.failed(
                'Unexpected step_output {step_output}, should have been caught earlier'
                .format(step_output=step_output))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
예제 #3
0
    def test_event_log_storage_store_with_multiple_runs(self, storage):
        runs = ["foo", "bar", "baz"]
        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 0
            storage.store_event(
                EventRecord(
                    None,
                    "Message2",
                    "debug",
                    "",
                    run_id,
                    time.time(),
                    dagster_event=DagsterEvent(
                        DagsterEventType.STEP_SUCCESS.value,
                        "nonce",
                        event_specific_data=StepSuccessData(duration_ms=100.0),
                    ),
                ))

        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 1
            assert storage.get_stats_for_run(run_id).steps_succeeded == 1

        if self.can_wipe():
            storage.wipe()
            for run_id in runs:
                assert len(storage.get_logs_for_run(run_id)) == 0
예제 #4
0
파일: util.py 프로젝트: syrusakbary/dagster
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    materialization = event_dict.get('intermediateMaterialization') or {}

    # Get event_type
    event_type = _handled_events().get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            value_repr=event_dict['valueRepr'],
            intermediate_materialization=Materialization(
                path=materialization.get('path'),
                description=materialization.get('description')),
        )

    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        event_specific_data = StepMaterializationData(
            materialization=Materialization(path=materialization.get('path'),
                                            description=materialization.get(
                                                'description')))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        result_metadata = event_dict['expectationResult'][
            'resultMetadataJsonString']
        expectation_result = ExpectationResult(
            event_dict['expectationResult']['success'],
            event_dict['expectationResult']['name'],
            event_dict['expectationResult']['message'],
            json.loads(result_metadata) if result_metadata else None,
        )
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        error_info = SerializableErrorInfo(event_dict['error']['message'],
                                           stack=None,
                                           cls_name=None)
        event_specific_data = StepFailureData(error_info)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict['step']['key'],
        solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None,
                                 None),
        step_kind_value=event_dict['step']['kind'],
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
예제 #5
0
def _core_dagster_event_sequence_for_step(step_context):
    '''
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    '''
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)

    yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediates_manager(
            step_context).items():
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event, Materialization):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    'Unexpected event {event}, should have been caught earlier'
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
예제 #6
0
파일: test_active.py 프로젝트: keyz/dagster
def test_recover_with_step_in_flight():
    foo_job = define_foo_job()

    with pytest.raises(
        DagsterInvariantViolationError,
        match="Execution finished without completing the execution plan",
    ):
        with create_execution_plan(foo_job).start(RetryMode.DISABLED) as active_execution:
            steps = active_execution.get_steps_to_execute()
            assert len(steps) == 1
            step_1 = steps[0]
            assert step_1.key == "foo_op"

            active_execution.handle_event(
                DagsterEvent(
                    DagsterEventType.STEP_START.value,
                    pipeline_name=foo_job.name,
                    step_key=step_1.key,
                )
            )

    # CRASH!- we've closed the active execution. Now we recover, spinning up a new one

    with create_execution_plan(foo_job).start(RetryMode.DISABLED) as active_execution:
        possibly_in_flight_steps = active_execution.rebuild_from_events(
            [
                DagsterEvent(
                    DagsterEventType.STEP_START.value,
                    pipeline_name=foo_job.name,
                    step_key=step_1.key,
                )
            ]
        )
        assert possibly_in_flight_steps == [step_1]

        assert not active_execution.get_steps_to_execute()

        active_execution.handle_event(
            DagsterEvent(
                DagsterEventType.STEP_SUCCESS.value,
                pipeline_name=foo_job.name,
                event_specific_data=StepSuccessData(duration_ms=10.0),
                step_key=step_1.key,
            )
        )
예제 #7
0
def _core_dagster_event_sequence_for_step(step_context, inputs, intermediates_manager):
    '''
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    '''
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(inputs, 'inputs', key_type=str)
    check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager)

    evaluated_inputs = {}
    # do runtime type checks of inputs versus step inputs
    for input_name, input_value in inputs.items():
        evaluated_inputs[input_name] = _get_evaluated_input(
            step_context.step, input_name, input_value
        )
    yield DagsterEvent.step_start_event(step_context)

    with time_execution_scope() as timer_result:
        event_sequence = check.generator(
            _event_sequence_for_step_compute_fn(step_context, evaluated_inputs)
        )

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for event in check.generator(
            _step_output_error_checked_event_sequence(step_context, event_sequence)
        ):

            if isinstance(event, Result):
                yield _create_step_output_event(step_context, event, intermediates_manager)
            elif isinstance(event, Materialization):
                yield DagsterEvent.step_materialization(step_context, event)
            elif isinstance(event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(step_context, event)
            else:
                check.failed(
                    'Unexpected event {event}, should have been caught earlier'.format(event=event)
                )

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis)
    )
예제 #8
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediate_storage(
            step_context):
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, MultipleStepOutputsListWrapper):
            for op in input_value:
                yield DagsterEvent.object_store_operation(
                    step_context,
                    ObjectStoreOperation.serializable(op,
                                                      value_name=input_name))
            inputs[input_name] = [op.obj for op in input_value]
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    # We only want to log exactly one step success event or failure event if possible,
    # so wait to handle any interrupts (that normally log a failure event) until the success
    # event has finished
    with delay_interrupts():
        yield DagsterEvent.step_success_event(
            step_context, StepSuccessData(duration_ms=timer_result.millis))
예제 #9
0
파일: util.py 프로젝트: cy56/dagster
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = HANDLED_EVENTS.get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_UP_FOR_RETRY:
        event_specific_data = StepRetryData(
            error=error_from_data(event_dict['retryError']),
            seconds_to_wait=event_dict['secondsToWait'],
        )

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        event_specific_data = StepFailureData(
            error_from_data(event_dict['error']),
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ) if event_dict.get('failureMetadata') else None,
        )

    elif event_type == DagsterEventType.ENGINE_EVENT:
        event_specific_data = EngineEventData(
            metadata_entries=list(
                event_metadata_entries(event_dict.get('metadataEntries'))),
            marker_start=event_dict.get('markerStart'),
            marker_end=event_dict.get('markerEnd'),
            error=error_from_data(event_dict['engineError'])
            if event_dict.get('engineError') else None,
        )

    # We should update the GraphQL response so that clients don't need to do this handle parsing.
    # See: https://github.com/dagster-io/dagster/issues/1559
    handle = None
    step_key = None
    step_kind_value = None
    if 'step' in event_dict and event_dict['step']:
        step_key = event_dict['step']['key']
        step_kind_value = event_dict['step']['kind']
        keys = event_dict['step']['solidHandleID'].split('.')
        while keys:
            handle = SolidHandle(keys.pop(0), parent=handle)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=step_key,
        solid_handle=handle,
        step_kind_value=step_kind_value,
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
예제 #10
0
def _stats_records(run_id):
    now = time.time()
    return [
        _event_record(run_id, "A", now - 325, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            "A",
            now - 225,
            DagsterEventType.STEP_SUCCESS,
            StepSuccessData(duration_ms=100000.0),
        ),
        _event_record(run_id, "B", now - 225, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            "B",
            now - 175,
            DagsterEventType.STEP_FAILURE,
            StepFailureData(error=None, user_failure_data=None),
        ),
        _event_record(run_id, "C", now - 175, DagsterEventType.STEP_START),
        _event_record(run_id, "C", now - 150, DagsterEventType.STEP_SKIPPED),
        _event_record(run_id, "D", now - 150, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            "D",
            now - 125,
            DagsterEventType.ASSET_MATERIALIZATION,
            StepMaterializationData(AssetMaterialization(asset_key="mat_1")),
        ),
        _event_record(
            run_id,
            "D",
            now - 100,
            DagsterEventType.STEP_EXPECTATION_RESULT,
            StepExpectationResultData(
                ExpectationResult(success=True, label="exp 1")),
        ),
        _event_record(
            run_id,
            "D",
            now - 75,
            DagsterEventType.ASSET_MATERIALIZATION,
            StepMaterializationData(AssetMaterialization(asset_key="mat_2")),
        ),
        _event_record(
            run_id,
            "D",
            now - 50,
            DagsterEventType.STEP_EXPECTATION_RESULT,
            StepExpectationResultData(
                ExpectationResult(success=False, label="exp 2")),
        ),
        _event_record(
            run_id,
            "D",
            now - 25,
            DagsterEventType.ASSET_MATERIALIZATION,
            StepMaterializationData(AssetMaterialization(asset_key="mat_3")),
        ),
        _event_record(
            run_id,
            "D",
            now,
            DagsterEventType.STEP_SUCCESS,
            StepSuccessData(duration_ms=150000.0),
        ),
    ]
예제 #11
0
파일: util.py 프로젝트: sd2k/dagster
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, "event_dict", key_type=str)
    check.str_param(pipeline_name, "pipeline_name")

    # Get event_type
    event_type = HANDLED_EVENTS.get(event_dict["__typename"])
    if not event_type:
        raise Exception("unhandled event type %s" % event_dict["__typename"])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict["stepKey"],
                                                event_dict["outputName"]),
            type_check_data=TypeCheckData(
                success=event_dict["typeCheck"]["success"],
                label=event_dict["typeCheck"]["label"],
                description=event_dict.get("description"),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get("metadataEntries"))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict["inputName"],
            type_check_data=TypeCheckData(
                success=event_dict["typeCheck"]["success"],
                label=event_dict["typeCheck"]["label"],
                description=event_dict.get("description"),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get("metadataEntries"))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_UP_FOR_RETRY:
        event_specific_data = StepRetryData(
            error=error_from_data(event_dict["retryError"]),
            seconds_to_wait=event_dict["secondsToWait"],
        )

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict["materialization"]
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict["expectationResult"])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        event_specific_data = StepFailureData(
            error_from_data(event_dict["error"]),
            UserFailureData(
                label=event_dict["failureMetadata"]["label"],
                description=event_dict["failureMetadata"]["description"],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get("metadataEntries"))
                    or []),
            ) if event_dict.get("failureMetadata") else None,
        )

    elif event_type == DagsterEventType.ENGINE_EVENT:
        event_specific_data = EngineEventData(
            metadata_entries=list(
                event_metadata_entries(event_dict.get("metadataEntries"))),
            marker_start=event_dict.get("markerStart"),
            marker_end=event_dict.get("markerEnd"),
            error=error_from_data(event_dict["engineError"])
            if event_dict.get("engineError") else None,
        )

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict.get("stepKey"),
        solid_handle=SolidHandle.from_string(event_dict["solidHandleID"])
        if event_dict.get("solidHandleID") else None,
        # at the time of writing this:
        # * 'COMPUTE` is the only step kind
        # * this code should get deleted in the near future as we move away from
        #   dagster-graphql CLI as what we invoke in dask/k8s/etc.
        step_kind_value="COMPUTE" if event_dict.get("stepKey") else None,
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
예제 #12
0
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = _handled_events().get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        error_info = SerializableErrorInfo(event_dict['error']['message'],
                                           stack=None,
                                           cls_name=None)
        event_specific_data = StepFailureData(
            error_info,
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ) if event_dict.get('failureMetadata') else None,
        )

    # We should update the GraphQL response so that clients don't need to do this handle parsing.
    # See: https://github.com/dagster-io/dagster/issues/1559
    keys = event_dict['step']['solidHandleID'].split('.')
    handle = None
    while keys:
        handle = SolidHandle(keys.pop(0), definition_name=None, parent=handle)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict['step']['key'],
        solid_handle=handle,
        step_kind_value=event_dict['step']['kind'],
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
예제 #13
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _load_input_values(step_context):
        # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, FanInStepInputValuesWrapper):
            final_values = []
            for inner_value in input_value:
                # inner value is either a store interaction
                # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
                if isinstance(inner_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            inner_value, value_name=input_name),
                    )
                    final_values.append(inner_value.obj)
                elif isinstance(inner_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context,
                        AssetStoreOperation.serializable(inner_value))
                    final_values.append(inner_value.obj)
                # or the value directly
                else:
                    final_values.append(inner_value)

            inputs[input_name] = final_values
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(
                step_context, AssetStoreOperation.serializable(input_value))
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
예제 #14
0
파일: util.py 프로젝트: varokas/dagster-1
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = HANDLED_EVENTS.get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['stepKey'], event_dict['outputName']),
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries')) or []
                ),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries')) or []
                ),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_UP_FOR_RETRY:
        event_specific_data = StepRetryData(
            error=error_from_data(event_dict['retryError']),
            seconds_to_wait=event_dict['secondsToWait'],
        )

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization)
        )
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        event_specific_data = StepFailureData(
            error_from_data(event_dict['error']),
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries')) or []
                ),
            )
            if event_dict.get('failureMetadata')
            else None,
        )

    elif event_type == DagsterEventType.ENGINE_EVENT:
        event_specific_data = EngineEventData(
            metadata_entries=list(event_metadata_entries(event_dict.get('metadataEntries'))),
            marker_start=event_dict.get('markerStart'),
            marker_end=event_dict.get('markerEnd'),
            error=error_from_data(event_dict['engineError'])
            if event_dict.get('engineError')
            else None,
        )

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict.get('stepKey'),
        solid_handle=SolidHandle.from_string(event_dict['solidHandleID'])
        if event_dict.get('solidHandleID')
        else None,
        # at the time of writing this:
        # * 'COMPUTE` is the only step kind
        # * this code should get deleted in the near future as we move away from
        #   dagster-graphql CLI as what we invoke in dask/k8s/etc.
        step_kind_value='COMPUTE' if event_dict.get('stepKey') else None,
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
예제 #15
0
def core_dagster_event_sequence_for_step(
    step_context: StepExecutionContext,
) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)

    if step_context.previous_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context, step_context.previous_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue
        for event_or_input_value in ensure_gen(step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
            _type_checked_event_sequence_for_input(step_context, input_name, input_value)
        ):
            yield evt

    input_lineage = step_context.get_input_lineage()

    # The core execution loop expects a compute generator in a specific format: a generator that
    # takes a context and dictionary of inputs as input, yields output events. If a solid definition
    # was generated from the @solid or @lambda_solid decorator, then compute_fn needs to be coerced
    # into this format. If the solid definition was created directly, then it is expected that the
    # compute_fn is already in this format.
    if isinstance(step_context.solid_def.compute_fn, DecoratedSolidFunction):
        core_gen = create_solid_compute_wrapper(step_context.solid_def)
    else:
        core_gen = step_context.solid_def.compute_fn

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            execute_core_compute(
                step_context,
                inputs,
                core_gen,
            )
        )

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
            _step_output_error_checked_user_event_sequence(step_context, user_event_sequence)
        ):
            if isinstance(user_event, DagsterEvent):
                yield user_event
            elif isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(step_context, user_event, input_lineage):
                    yield evt
            # for now, I'm ignoring AssetMaterializations yielded manually, but we might want
            # to do something with these in the above path eventually
            elif isinstance(user_event, (AssetMaterialization, Materialization)):
                yield DagsterEvent.asset_materialization(step_context, user_event, input_lineage)
            elif isinstance(user_event, AssetObservation):
                yield DagsterEvent.asset_observation(step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier".format(
                        event=user_event
                    )
                )

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis)
    )
예제 #16
0
파일: test_active.py 프로젝트: keyz/dagster
def test_recover_in_between_steps():
    two_op_job = define_two_op_job()

    events = [
        DagsterEvent(
            DagsterEventType.STEP_START.value,
            pipeline_name=two_op_job.name,
            step_key="foo_op",
        ),
        DagsterEvent(
            DagsterEventType.STEP_OUTPUT.value,
            pipeline_name=two_op_job.name,
            event_specific_data=StepOutputData(
                StepOutputHandle(step_key="foo_op", output_name="result")
            ),
            step_key="foo_op",
        ),
        DagsterEvent(
            DagsterEventType.STEP_SUCCESS.value,
            pipeline_name=two_op_job.name,
            event_specific_data=StepSuccessData(duration_ms=10.0),
            step_key="foo_op",
        ),
    ]

    with pytest.raises(
        DagsterInvariantViolationError,
        match="Execution finished without completing the execution plan",
    ):
        with create_execution_plan(two_op_job).start(RetryMode.DISABLED) as active_execution:
            steps = active_execution.get_steps_to_execute()
            assert len(steps) == 1
            step_1 = steps[0]
            assert step_1.key == "foo_op"

            active_execution.handle_event(events[0])
            active_execution.handle_event(events[1])
            active_execution.handle_event(events[2])

    # CRASH!- we've closed the active execution. Now we recover, spinning up a new one

    with create_execution_plan(two_op_job).start(RetryMode.DISABLED) as active_execution:
        possibly_in_flight_steps = active_execution.rebuild_from_events(events)
        assert len(possibly_in_flight_steps) == 1
        step_2 = possibly_in_flight_steps[0]
        assert step_2.key == "bar_op"

        assert not active_execution.get_steps_to_execute()

        active_execution.handle_event(
            DagsterEvent(
                DagsterEventType.STEP_START.value,
                pipeline_name=two_op_job.name,
                step_key="bar_op",
            )
        )
        active_execution.handle_event(
            DagsterEvent(
                DagsterEventType.STEP_SUCCESS.value,
                pipeline_name=two_op_job.name,
                event_specific_data=StepSuccessData(duration_ms=10.0),
                step_key="bar_op",
            )
        )
예제 #17
0
def _stats_records(run_id):
    now = time.time()
    return [
        _event_record(run_id, 'A', now - 325, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            'A',
            now - 225,
            DagsterEventType.STEP_SUCCESS,
            StepSuccessData(duration_ms=100000.0),
        ),
        _event_record(run_id, 'B', now - 225, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            'B',
            now - 175,
            DagsterEventType.STEP_FAILURE,
            StepFailureData(error=None, user_failure_data=None),
        ),
        _event_record(run_id, 'C', now - 175, DagsterEventType.STEP_START),
        _event_record(run_id, 'C', now - 150, DagsterEventType.STEP_SKIPPED),
        _event_record(run_id, 'D', now - 150, DagsterEventType.STEP_START),
        _event_record(
            run_id,
            'D',
            now - 125,
            DagsterEventType.STEP_MATERIALIZATION,
            StepMaterializationData(Materialization(label='mat 1')),
        ),
        _event_record(
            run_id,
            'D',
            now - 100,
            DagsterEventType.STEP_EXPECTATION_RESULT,
            StepExpectationResultData(ExpectationResult(success=True, label='exp 1')),
        ),
        _event_record(
            run_id,
            'D',
            now - 75,
            DagsterEventType.STEP_MATERIALIZATION,
            StepMaterializationData(Materialization(label='mat 2')),
        ),
        _event_record(
            run_id,
            'D',
            now - 50,
            DagsterEventType.STEP_EXPECTATION_RESULT,
            StepExpectationResultData(ExpectationResult(success=False, label='exp 2')),
        ),
        _event_record(
            run_id,
            'D',
            now - 25,
            DagsterEventType.STEP_MATERIALIZATION,
            StepMaterializationData(Materialization(label='mat 3')),
        ),
        _event_record(
            run_id, 'D', now, DagsterEventType.STEP_SUCCESS, StepSuccessData(duration_ms=150000.0)
        ),
    ]
예제 #18
0
def core_dagster_event_sequence_for_step(
        step_context: SystemStepExecutionContext,
        prior_attempt_count: int) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue

        for event_or_input_value in ensure_gen(
                step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
예제 #19
0
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = _handled_events().get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            value_repr=event_dict['valueRepr'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            value_repr=event_dict['valueRepr'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        error_info = SerializableErrorInfo(event_dict['error']['message'],
                                           stack=None,
                                           cls_name=None)
        event_specific_data = StepFailureData(
            error_info,
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ) if event_dict.get('failureMetadata') else None,
        )

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict['step']['key'],
        solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None,
                                 None),
        step_kind_value=event_dict['step']['kind'],
        logging_tags=None,
        event_specific_data=event_specific_data,
    )