Ejemplo n.º 1
0
    def test_event_log_get_stats_for_run(self, storage):
        import math

        enqueued_time = time.time()
        launched_time = enqueued_time + 20
        start_time = launched_time + 50
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                DEFAULT_RUN_ID,
                enqueued_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_ENQUEUED.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                DEFAULT_RUN_ID,
                launched_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_STARTING.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                DEFAULT_RUN_ID,
                start_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_START.value,
                    "nonce",
                ),
            ))
        assert math.isclose(
            storage.get_stats_for_run(DEFAULT_RUN_ID).enqueued_time,
            enqueued_time)
        assert math.isclose(
            storage.get_stats_for_run(DEFAULT_RUN_ID).launch_time,
            launched_time)
        assert math.isclose(
            storage.get_stats_for_run(DEFAULT_RUN_ID).start_time, start_time)
Ejemplo n.º 2
0
def test_event_log_get_stats_for_run(event_storage_factory_cm_fn):
    import math

    with event_storage_factory_cm_fn() as storage:
        enqueued_time = time.time()
        launched_time = enqueued_time + 20
        start_time = launched_time + 50
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                "foo",
                enqueued_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_ENQUEUED.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                "foo",
                launched_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_STARTING.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                "foo",
                start_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_START.value,
                    "nonce",
                ),
            ))
        assert math.isclose(
            storage.get_stats_for_run("foo").enqueued_time, enqueued_time)
        assert math.isclose(
            storage.get_stats_for_run("foo").launch_time, launched_time)
        assert math.isclose(
            storage.get_stats_for_run("foo").start_time, start_time)
Ejemplo n.º 3
0
    def test_correct_timezone(self, storage):
        curr_time = time.time()

        event = EventRecord(
            None,
            "Message2",
            "debug",
            "",
            "foo",
            curr_time,
            dagster_event=DagsterEvent(
                DagsterEventType.PIPELINE_START.value,
                "nonce",
                event_specific_data=EngineEventData.in_process(999),
            ),
        )

        storage.store_event(event)

        logs = storage.get_logs_for_run("foo")

        assert len(logs) == 1

        log = logs[0]

        stats = storage.get_stats_for_run("foo")

        assert int(log.timestamp) == int(stats.start_time)
        assert int(log.timestamp) == int(curr_time)
Ejemplo n.º 4
0
    def test_event_log_storage_store_with_multiple_runs(self, storage):
        runs = ["foo", "bar", "baz"]
        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 0
            storage.store_event(
                EventRecord(
                    None,
                    "Message2",
                    "debug",
                    "",
                    run_id,
                    time.time(),
                    dagster_event=DagsterEvent(
                        DagsterEventType.STEP_SUCCESS.value,
                        "nonce",
                        event_specific_data=StepSuccessData(duration_ms=100.0),
                    ),
                ))

        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 1
            assert storage.get_stats_for_run(run_id).steps_succeeded == 1

        if self.can_wipe():
            storage.wipe()
            for run_id in runs:
                assert len(storage.get_logs_for_run(run_id)) == 0
Ejemplo n.º 5
0
def _event_record(run_id,
                  solid_name,
                  timestamp,
                  event_type,
                  event_specific_data=None):
    pipeline_name = "pipeline_name"
    solid_handle = SolidHandle(solid_name, None)
    step_handle = StepHandle(solid_handle)
    return EventRecord(
        None,
        "",
        "debug",
        "",
        run_id,
        timestamp,
        step_key=step_handle.to_key(),
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(
            event_type.value,
            pipeline_name,
            solid_handle=solid_handle,
            step_handle=step_handle,
            event_specific_data=event_specific_data,
        ),
    )
    def _dequeue_run(self, instance, run, location_manager):
        external_pipeline = location_manager.get_external_pipeline_from_run(
            run)
        # double check that the run is still queued before dequeing
        reloaded_run = instance.get_run_by_id(run.run_id)

        if reloaded_run.status != PipelineRunStatus.QUEUED:
            self._logger.info(
                "Run {run_id} is now {status} instead of QUEUED, skipping".
                format(run_id=reloaded_run.run_id, status=reloaded_run.status))
            return

        dequeued_event = DagsterEvent(
            event_type_value=DagsterEventType.PIPELINE_DEQUEUED.value,
            pipeline_name=run.pipeline_name,
        )
        event_record = EventRecord(
            message="",
            user_message="",
            level=logging.INFO,
            pipeline_name=run.pipeline_name,
            run_id=run.run_id,
            error_info=None,
            timestamp=time.time(),
            dagster_event=dequeued_event,
        )
        instance.handle_new_event(event_record)

        instance.launch_run(run.run_id, external_pipeline)
Ejemplo n.º 7
0
def test_event_sink_serialization():
    event_records = []

    class TestEventSink(EventSink):
        def on_dagster_event(self, dagster_event):
            event_records.append(dagster_event)

        def on_log_message(self, log_message):
            event_records.append(log_message)

    @lambda_solid
    def no():
        raise Exception('no')

    @pipeline
    def fails():
        no()

    sink = TestEventSink()

    # basic success
    execute_pipeline(define_simple(), run_config=RunConfig(event_sink=sink))
    # basic failure
    execute_pipeline(
        fails,
        run_config=RunConfig(event_sink=sink),
        environment_dict={
            'execution': {
                'in_process': {
                    'config': {
                        'raise_on_error': False
                    }
                }
            }
        },
    )
    # multiproc
    execute_pipeline(
        ExecutionTargetHandle.for_pipeline_fn(
            define_simple).build_pipeline_definition(),
        run_config=RunConfig(event_sink=sink),
        environment_dict={
            'storage': {
                'filesystem': {}
            },
            'execution': {
                'multiprocess': {}
            }
        },
    )
    # kitchen sink
    execute_pipeline(many_events, run_config=RunConfig(event_sink=sink))

    for dagster_event in event_records:
        payload = dagster_event.to_json()
        clone = EventRecord.from_json(payload)
        assert clone == dagster_event
Ejemplo n.º 8
0
def create_test_event_log_record(message: str, run_id: str = DEFAULT_RUN_ID):
    return EventRecord(
        None,
        message,
        "debug",
        "",
        run_id,
        time.time(),
        dagster_event=DagsterEvent(
            DagsterEventType.ENGINE_EVENT.value,
            "nonce",
            event_specific_data=EngineEventData.in_process(999),
        ),
    )
Ejemplo n.º 9
0
 def evt(name):
     return EventRecord(
         None,
         name,
         "debug",
         "",
         "foo",
         time.time(),
         dagster_event=DagsterEvent(
             DagsterEventType.ENGINE_EVENT.value,
             "nonce",
             event_specific_data=EngineEventData.in_process(999),
         ),
     )
Ejemplo n.º 10
0
 def create_event(count: int, run_id: str = RUN_ID):
     return EventRecord(
         None,
         str(count),
         "debug",
         "",
         run_id,
         time.time(),
         dagster_event=DagsterEvent(
             DagsterEventType.ENGINE_EVENT.value,
             "nonce",
             event_specific_data=EngineEventData.in_process(999),
         ),
     )
Ejemplo n.º 11
0
def _materialization_event_record(run_id, asset_key):
    return EventRecord(
        None,
        "",
        "debug",
        "",
        run_id,
        time.time() - 25,
        step_key="my_step_key",
        pipeline_name="my_pipeline",
        dagster_event=DagsterEvent(
            DagsterEventType.ASSET_MATERIALIZATION.value,
            "my_pipeline",
            step_key="my_step_key",
            event_specific_data=StepMaterializationData(AssetMaterialization(asset_key=asset_key)),
        ),
    )
Ejemplo n.º 12
0
 def test_event_log_storage_store_events_and_wipe(self, storage):
     assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0
     storage.store_event(
         EventRecord(
             None,
             "Message2",
             "debug",
             "",
             DEFAULT_RUN_ID,
             time.time(),
             dagster_event=DagsterEvent(
                 DagsterEventType.ENGINE_EVENT.value,
                 "nonce",
                 event_specific_data=EngineEventData.in_process(999),
             ),
         ))
     assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 1
     assert storage.get_stats_for_run(DEFAULT_RUN_ID)
     storage.wipe()
     assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0
Ejemplo n.º 13
0
def test_event_log_storage_store_events_and_wipe(event_storage_factory_cm_fn):
    with event_storage_factory_cm_fn() as storage:
        assert len(storage.get_logs_for_run("foo")) == 0
        storage.store_event(
            EventRecord(
                None,
                "Message2",
                "debug",
                "",
                "foo",
                time.time(),
                dagster_event=DagsterEvent(
                    DagsterEventType.ENGINE_EVENT.value,
                    "nonce",
                    event_specific_data=EngineEventData.in_process(999),
                ),
            ))
        assert len(storage.get_logs_for_run("foo")) == 1
        assert storage.get_stats_for_run("foo")
        storage.wipe()
        assert len(storage.get_logs_for_run("foo")) == 0
Ejemplo n.º 14
0
    def submit_run(self, pipeline_run, external_pipeline):
        check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
        check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED)

        enqueued_event = DagsterEvent(
            event_type_value=DagsterEventType.PIPELINE_ENQUEUED.value,
            pipeline_name=pipeline_run.pipeline_name,
        )
        event_record = EventRecord(
            message="",
            user_message="",
            level=logging.INFO,
            pipeline_name=pipeline_run.pipeline_name,
            run_id=pipeline_run.run_id,
            error_info=None,
            timestamp=time.time(),
            dagster_event=enqueued_event,
        )
        self._instance.handle_new_event(event_record)

        return self._instance.get_run_by_id(pipeline_run.run_id)
Ejemplo n.º 15
0
def construct_step_failure_event_and_handle(pipeline_run, step_key, err, instance):
    step_failure_event = DagsterEvent(
        event_type_value=DagsterEventType.STEP_FAILURE.value,
        pipeline_name=pipeline_run.pipeline_name,
        step_key=step_key,
        event_specific_data=StepFailureData(
            error=serializable_error_info_from_exc_info(sys.exc_info()),
            user_failure_data=UserFailureData(label="K8sError"),
        ),
    )
    event_record = EventRecord(
        message=str(err),
        user_message=str(err),
        level=logging.ERROR,
        pipeline_name=pipeline_run.pipeline_name,
        run_id=pipeline_run.run_id,
        error_info=None,
        step_key=step_key,
        timestamp=time.time(),
        dagster_event=step_failure_event,
    )
    instance.handle_new_event(event_record)
    return step_failure_event