Example #1
0
def test_ecs_run_launcher_inits():
    DagsterInstance.local_temp(
        overrides={
            "run_launcher": {
                "module": "dagster_aws.ecs.launcher",
                "class": "ECSRunLauncher"
            }
        })
Example #2
0
def test_workspace_yamls():
    with load_workspace_process_context_from_yaml_paths(
        DagsterInstance.ephemeral(),
        [
            file_relative_path(
                __file__,
                "../../../docs_snippets/concepts/repositories_workspaces/workspace.yaml",
            )
        ],
    ) as workspace_process_context:
        assert workspace_process_context.repository_locations_count == 1

    with load_workspace_process_context_from_yaml_paths(
        DagsterInstance.ephemeral(),
        [
            file_relative_path(
                __file__,
                "../../../docs_snippets/concepts/repositories_workspaces/workspace_working_directory.yaml",
            )
        ],
    ) as workspace_process_context:
        assert workspace_process_context.repository_locations_count == 2

    with load_workspace_process_context_from_yaml_paths(
        DagsterInstance.ephemeral(),
        [
            file_relative_path(
                __file__,
                "../../../docs_snippets/concepts/repositories_workspaces/workspace_one_repository.yaml",
            )
        ],
    ) as workspace_process_context:
        assert workspace_process_context.repository_locations_count == 1

    with load_workspace_process_context_from_yaml_paths(
        DagsterInstance.ephemeral(),
        [
            file_relative_path(
                __file__,
                "../../../docs_snippets/concepts/repositories_workspaces/workspace_python_package.yaml",
            )
        ],
    ) as workspace_process_context:
        assert workspace_process_context.repository_locations_count == 1

    with load_workspace_process_context_from_yaml_paths(
        DagsterInstance.ephemeral(),
        [
            file_relative_path(
                __file__,
                "../../../docs_snippets/concepts/repositories_workspaces/workspace_grpc.yaml",
            )
        ],
    ) as workspace_process_context:
        assert workspace_process_context.repository_locations_count == 1
Example #3
0
def monitor_starting_run(instance: DagsterInstance, run, logger):
    check.invariant(run.status == PipelineRunStatus.STARTING)
    run_stats = instance.get_run_stats(run.run_id)

    check.invariant(run_stats.launch_time is not None,
                    "Run in status STARTING doesn't have a launch time.")
    if time.time(
    ) - run_stats.launch_time >= instance.run_monitoring_start_timeout_seconds:
        msg = (
            f"Run {run.run_id} has been running for {time.time() - run_stats.launch_time} seconds, "
            f"which is longer than the timeout of {instance.run_monitoring_start_timeout_seconds} seconds to start. "
            "Marking run failed")
        logger.info(msg)
        instance.report_run_failed(run, msg)
def test_run_status_sensor():
    @run_status_sensor(pipeline_run_status=DagsterRunStatus.SUCCESS)
    def status_sensor(context):
        assert context.dagster_event.event_type_value == "PIPELINE_SUCCESS"

    @op
    def succeeds():
        return 1

    @job
    def my_job_2():
        succeeds()

    instance = DagsterInstance.ephemeral()
    result = my_job_2.execute_in_process(instance=instance, raise_on_error=False)

    dagster_run = result.dagster_run
    dagster_event = result.get_job_success_event()

    context = build_run_status_sensor_context(
        sensor_name="status_sensor",
        dagster_instance=instance,
        dagster_run=dagster_run,
        dagster_event=dagster_event,
    )

    status_sensor(context)
Example #5
0
def dagit_debug_command(input_files, port, asgi):
    debug_payloads = []
    for input_file in input_files:
        click.echo("Loading {} ...".format(input_file))
        with GzipFile(input_file, "rb") as file:
            blob = file.read().decode("utf-8")
            debug_payload = deserialize_json_to_dagster_namedtuple(blob)

            check.invariant(isinstance(debug_payload, DebugRunPayload))

            click.echo("\trun_id: {} \n\tdagster version: {}".format(
                debug_payload.pipeline_run.run_id, debug_payload.version))
            debug_payloads.append(debug_payload)

    instance = DagsterInstance.ephemeral(preload=debug_payloads)

    if asgi:
        uvicorn.run(
            DagitWebserver(
                WorkspaceProcessContext(
                    instance, None,
                    version=__version__)).create_asgi_app(debug=True),
            port=port,
        )
    else:
        host_dagit_ui_with_workspace_process_context(
            workspace_process_context=WorkspaceProcessContext(
                instance, None, version=__version__),
            port=port,
            port_lookup=True,
            host=DEFAULT_DAGIT_HOST,
            path_prefix="",
        )
Example #6
0
def test_yielded_results_config_pandas(snapshot):
    run_config = {
        "resources": {
            "ge_data_context": {
                "config": {
                    "ge_root_dir":
                    file_relative_path(__file__, "./great_expectations")
                }
            }
        }
    }
    result = execute_pipeline(
        reconstructable(hello_world_pandas_pipeline),
        run_config=run_config,
        mode="basic",
        instance=DagsterInstance.local_temp(),
    )
    assert result.result_for_solid(
        "reyielder").output_value()[0]["success_percent"] == 100
    expectations = result.result_for_solid(
        "ge_validation_solid").expectation_results_during_compute
    assert len(expectations) == 1
    mainexpect = expectations[0]
    assert mainexpect.success
    # purge system specific metadata for testing
    metadata = mainexpect.metadata_entries[0].entry_data.md_str.split(
        "### Info")[0]
    snapshot.assert_match(metadata)
Example #7
0
def test_yielded_results_config():
    run_config = {
        'resources': {
            'ge_data_context': {
                'config': {
                    'ge_root_dir':
                    file_relative_path(__file__, "./great_expectations")
                }
            }
        }
    }
    result = execute_pipeline(
        reconstructable(hello_world_pipeline),
        run_config=run_config,
        mode='basic',
        instance=DagsterInstance.local_temp(),
    )
    assert result.result_for_solid(
        "reyielder").output_value()[0]["success_percent"] == 100
    expectations = result.result_for_solid(
        "ge_validation_solid").expectation_results_during_compute
    assert len(expectations) == 1
    mainexpect = expectations[0]
    assert mainexpect.success
    metadata = mainexpect.metadata_entries[0].entry_data.data
    assert metadata['overall'] == {
        'evaluated_expectations': 11,
        'success_percent': 100.0,
        'successful_expectations': 11,
        'unsuccessful_expectations': 0,
    }
Example #8
0
def import_command(input_files: Tuple[str, ...]):
    debug_payloads = []
    for input_file in input_files:
        with GzipFile(input_file, "rb") as file:
            blob = file.read().decode("utf-8")
            debug_payload = deserialize_as(blob, DebugRunPayload)
            debug_payloads.append(debug_payload)

    with DagsterInstance.get() as instance:
        for debug_payload in debug_payloads:
            run = debug_payload.pipeline_run
            click.echo(f"Importing run {run.run_id} (Dagster: {debug_payload.version})")
            if not instance.has_snapshot(run.execution_plan_snapshot_id):
                instance.add_snapshot(
                    debug_payload.execution_plan_snapshot,
                    run.execution_plan_snapshot_id,
                )
            if not instance.has_snapshot(run.pipeline_snapshot_id):
                instance.add_snapshot(
                    debug_payload.pipeline_snapshot,
                    run.pipeline_snapshot_id,
                )

            if not instance.has_run(run.run_id):
                instance.add_run(run)

                for event in tqdm(debug_payload.event_list):
                    instance.store_event(event)
Example #9
0
def define_test_snapshot_context():
    return DagsterSnapshotGraphQLContext(
        instance=DagsterInstance.ephemeral(),
        execution_manager=SynchronousExecutionManager(),
        repository_snapshot=RepositorySnapshot.from_repository_definition(
            define_repository()),
    )
Example #10
0
def test_execute_pipeline_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    pipeline = PipelineDefinition(
        name='basic_resource_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={'a': resource_a, 'b': resource_b},
                logger_defs={'callback': construct_event_logger(event_callback)},
            )
        ],
    )
    iterator = execute_pipeline_iterator(
        pipeline,
        environment_dict={'loggers': {'callback': {}}},
        instance=DagsterInstance.local_temp(),
    )

    event_type = None
    while event_type != 'STEP_START':
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    events = [record.dagster_event for record in records if record.is_dagster_event]
    messages = [record.user_message for record in records if not record.is_dagster_event]
    assert len([event for event in events if event.is_pipeline_failure]) > 0
    assert len([message for message in messages if message == 'CLEANING A']) > 0
    assert len([message for message in messages if message == 'CLEANING B']) > 0
Example #11
0
def test_multiple_local_cluster():
    cluster_configs = [
        {
            "n_workers": 1,
            "threads_per_worker": 2,
            "dashboard_address": None,
        },
        {
            "n_workers": 2,
            "threads_per_worker": 1,
            "dashboard_address": None,
        },
    ]

    for cluster_config in cluster_configs:
        run_config = {
            "resources": {
                "dask": {
                    "config": {
                        "cluster": {
                            "local": cluster_config
                        }
                    }
                }
            }
        }
        result = execute_pipeline(
            scheduler_info_pipeline,
            run_config=run_config,
            instance=DagsterInstance.local_temp(),
        )
        _assert_scheduler_info_result(result, cluster_config)
def test_multiple_outputs_only_emit_one_multiproc():
    pipe = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'define_multi_out'
    ).build_pipeline_definition()
    result = execute_pipeline(
        pipe,
        environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}},
        instance=DagsterInstance.local_temp(),
    )
    assert result.success

    solid_result = result.result_for_solid('multiple_outputs')
    assert set(solid_result.output_values.keys()) == set(['output_one'])

    with pytest.raises(
        DagsterInvariantViolationError,
        match="Output 'not_defined' not defined in solid 'multiple_outputs'",
    ):
        solid_result.output_value('not_defined')

    with pytest.raises(DagsterInvariantViolationError, match='Did not find result output_two'):
        solid_result.output_value('output_two')

    with pytest.raises(
        DagsterInvariantViolationError,
        match=(
            'Tried to get result for solid not_present in multiple_outputs_only_emit_one_pipeline. '
            'No such top level solid.'
        ),
    ):
        result.result_for_solid('not_present')

    assert result.result_for_solid('downstream_two').skipped
def test_input_manager_with_failure():
    @root_input_manager
    def should_fail(_):
        raise Failure(
            description="Foolure",
            metadata_entries=[
                EventMetadataEntry.text(label="label", text="text", description="description")
            ],
        )

    @solid(input_defs=[InputDefinition("_fail_input", root_manager_key="should_fail")])
    def fail_on_input(_, _fail_input):
        assert False, "should not be called"

    @pipeline(mode_defs=[ModeDefinition(resource_defs={"should_fail": should_fail})])
    def simple():
        fail_on_input()

    with tempfile.TemporaryDirectory() as tmpdir_path:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))

        result = execute_pipeline(simple, instance=instance, raise_on_error=False)

        assert not result.success

        failure_data = result.result_for_solid("fail_on_input").failure_data

        assert failure_data.error.cls_name == "Failure"

        assert failure_data.user_failure_data.description == "Foolure"
        assert failure_data.user_failure_data.metadata_entries[0].label == "label"
        assert failure_data.user_failure_data.metadata_entries[0].entry_data.text == "text"
        assert failure_data.user_failure_data.metadata_entries[0].description == "description"
def test_run_failure_sensor():
    @run_failure_sensor
    def failure_sensor(context):
        assert context.dagster_event.event_type_value == "PIPELINE_FAILURE"

    @op
    def will_fail():
        raise Exception("failure")

    @job
    def my_job():
        will_fail()

    instance = DagsterInstance.ephemeral()
    result = my_job.execute_in_process(instance=instance, raise_on_error=False)

    dagster_run = result.dagster_run
    dagster_event = result.get_job_failure_event()

    context = build_run_status_sensor_context(
        sensor_name="failure_sensor",
        dagster_instance=instance,
        dagster_run=dagster_run,
        dagster_event=dagster_event,
    ).for_run_failure()

    failure_sensor(context)
Example #15
0
def dagit_debug_command(input_files, port):
    debug_payloads = []
    for input_file in input_files:
        click.echo("Loading {} ...".format(input_file))
        with GzipFile(input_file, "rb") as file:
            blob = file.read().decode()
            debug_payload = deserialize_json_to_dagster_namedtuple(blob)

            check.invariant(isinstance(debug_payload, DebugRunPayload))

            click.echo(
                "\trun_id: {} \n\tdagster version: {}".format(
                    debug_payload.pipeline_run.run_id, debug_payload.version
                )
            )
            debug_payloads.append(debug_payload)

    instance = DagsterInstance.ephemeral(preload=debug_payloads)
    host_dagit_ui_with_workspace(
        workspace=Workspace([]),
        instance=instance,
        port=port,
        port_lookup=True,
        host=DEFAULT_DAGIT_HOST,
        path_prefix="",
    )
Example #16
0
def test_multiprocess_executor():
    result = execute_pipeline(
        run_config={
            # This section controls how the run will be executed.
            # The multiprocess executor runs each step in its own sub process.
            "execution": {
                "multiprocess": {}
            },
            # This section controls how values will be passed from one solid to the next.
            # The default is in memory, so here we set it to filesystem to allow the
            # separate subprocess to get the values
            "intermediate_storage": {
                "filesystem": {}
            },
        },
        # The default instance for this API is an in memory ephemeral one.
        # To allow the multiple processes to coordinate we use one here
        # backed by a temporary directory.
        instance=DagsterInstance.local_temp(),
        # A ReconstructablePipeline is necessary to load the pipeline in child processes.
        # reconstructable() is a utility function that captures where the
        # PipelineDefinition came from.
        pipeline=reconstructable(predict_color),
    )
    assert result.success
def test_resolve_memoized_execution_plan_yes_stored_results():
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input.compute", "result")

    instance = DagsterInstance.ephemeral()
    instance.get_addresses_for_step_output_versions = mock.MagicMock(
        return_value={(versioned_pipeline.name, step_output_handle): "some_address"}
    )

    memoized_execution_plan = instance.resolve_memoized_execution_plan(
        speculative_execution_plan, run_config={}, mode="default"
    )

    assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input.compute"]

    expected_handle = StepOutputHandle(
        step_key="versioned_solid_no_input.compute", output_name="result"
    )

    assert (
        memoized_execution_plan.step_dict["versioned_solid_takes_input.compute"]
        .step_input_dict["intput"]
        .source.step_output_handle
        == expected_handle
    )
Example #18
0
    def run_daemon_loop(
        self,
        instance_ref,
        daemon_uuid,
        daemon_shutdown_event,
        gen_workspace,
        heartbeat_interval_seconds,
        error_interval_seconds,
        until=None,
    ):
        from dagster.core.telemetry_upload import uploading_logging_thread

        # Each loop runs in its own thread with its own instance and IWorkspace
        with DagsterInstance.from_ref(instance_ref) as instance:
            with uploading_logging_thread():
                with gen_workspace(instance) as workspace:
                    check.inst_param(workspace, "workspace", IWorkspace)

                    daemon_generator = self.core_loop(instance, workspace)

                    try:
                        while (not daemon_shutdown_event.is_set()) and (
                            not until or pendulum.now("UTC") < until
                        ):
                            try:
                                result = check.opt_inst(
                                    next(daemon_generator), SerializableErrorInfo
                                )
                                if result:
                                    self._errors.appendleft((result, pendulum.now("UTC")))
                            except StopIteration:
                                self._logger.error(
                                    "Daemon loop finished without raising an error - daemon loops should run forever until they are interrupted."
                                )
                                break
                            except Exception:
                                error_info = serializable_error_info_from_exc_info(sys.exc_info())
                                self._logger.error(
                                    "Caught error, daemon loop will restart:\n{}".format(error_info)
                                )
                                self._errors.appendleft((error_info, pendulum.now("UTC")))
                                daemon_generator.close()
                                daemon_generator = self.core_loop(instance, workspace)
                            finally:
                                try:
                                    self._check_add_heartbeat(
                                        instance,
                                        daemon_uuid,
                                        heartbeat_interval_seconds,
                                        error_interval_seconds,
                                    )
                                except Exception:
                                    self._logger.error(
                                        "Failed to add heartbeat: \n{}".format(
                                            serializable_error_info_from_exc_info(sys.exc_info())
                                        )
                                    )
                    finally:
                        # cleanup the generator if it was stopped part-way through
                        daemon_generator.close()
Example #19
0
def temp_instance():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        try:
            yield instance
        finally:
            instance.run_launcher.join()
Example #20
0
def test_custom_path_asset_store():
    with seven.TemporaryDirectory() as tmpdir_path:

        instance = DagsterInstance.ephemeral()

        run_config = {
            "resources": {"fs_asset_store": {"config": {"base_dir": tmpdir_path}}},
        }

        result = execute_pipeline(
            custom_path_pipeline, run_config=run_config, mode="test", instance=instance
        )

        assert result.success

        filepath_call_api = os.path.join(tmpdir_path, "call_api_output")
        assert os.path.isfile(filepath_call_api)
        with open(filepath_call_api, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

        filepath_parse_df = os.path.join(tmpdir_path, "parse_df_output")
        assert os.path.isfile(filepath_parse_df)
        with open(filepath_parse_df, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3, 4, 5]

        assert reexecute_pipeline(
            custom_path_pipeline,
            result.run_id,
            run_config=run_config,
            mode="test",
            instance=instance,
            step_selection=["parse_df.compute*"],
        ).success
Example #21
0
def define_test_snapshot_context():
    return DagsterGraphQLOutOfProcessRepositoryContext(
        instance=DagsterInstance.ephemeral(),
        execution_manager=SynchronousExecutionManager(),
        external_repository=ExternalRepository.from_repository_def(
            define_repository()),
    )
def execute_pipeline_with_steps(pipeline_def, step_keys_to_execute=None):
    plan = create_execution_plan(pipeline_def, step_keys_to_execute=step_keys_to_execute)
    with DagsterInstance.ephemeral() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def, step_keys_to_execute=step_keys_to_execute,
        )
        return execute_plan(plan, instance, pipeline_run)
Example #23
0
def test_builtin_pipeline():
    with TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.ephemeral()

        run_config = {
            "resources": {
                "object_manager": {
                    "config": {
                        "base_dir": tmpdir_path
                    }
                }
            },
        }

        result = execute_pipeline(asset_store_pipeline,
                                  run_config=run_config,
                                  mode="test",
                                  instance=instance)

        assert result.success

        filepath_call_api = os.path.join(tmpdir_path, result.run_id,
                                         "call_api", "result")
        assert os.path.isfile(filepath_call_api)
        with open(filepath_call_api, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

        filepath_parse_df = os.path.join(tmpdir_path, result.run_id,
                                         "parse_df", "result")
        assert os.path.isfile(filepath_parse_df)
        with open(filepath_parse_df, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3, 4, 5]
Example #24
0
    def run_loop(
        self,
        daemon_uuid,
        daemon_shutdown_event,
        gen_workspace,
        heartbeat_interval_seconds,
        error_interval_seconds,
        until=None,
    ):
        # Each loop runs in its own thread with its own instance and IWorkspace
        with DagsterInstance.get() as instance:
            with gen_workspace(instance) as workspace:
                check.inst_param(workspace, "workspace", IWorkspace)

                while not daemon_shutdown_event.is_set() and (
                        not until or pendulum.now("UTC") < until):
                    curr_time = pendulum.now("UTC")
                    if (not self._last_iteration_time
                            or (curr_time - self._last_iteration_time
                                ).total_seconds() >= self.interval_seconds):
                        self._last_iteration_time = curr_time
                        self._run_iteration(
                            instance,
                            daemon_uuid,
                            daemon_shutdown_event,
                            workspace,
                            heartbeat_interval_seconds,
                            error_interval_seconds,
                            until,
                        )

                    self._check_add_heartbeat(instance, daemon_uuid,
                                              heartbeat_interval_seconds,
                                              error_interval_seconds)
                    daemon_shutdown_event.wait(0.5)
Example #25
0
def test_default_object_manager_reexecution():
    with seven.TemporaryDirectory() as tmpdir_path:
        default_asset_store = fs_object_manager.configured(
            {"base_dir": tmpdir_path})
        pipeline_def = define_pipeline(default_asset_store, {})
        instance = DagsterInstance.ephemeral()

        result = execute_pipeline(pipeline_def, instance=instance)
        assert result.success

        re_result = reexecute_pipeline(
            pipeline_def,
            result.run_id,
            instance=instance,
            step_selection=["solid_b"],
        )

        # re-execution should yield asset_store_operation events instead of intermediate events
        get_asset_events = list(
            filter(
                lambda evt: evt.is_asset_store_operation and
                AssetStoreOperationType(evt.event_specific_data.op
                                        ) == AssetStoreOperationType.GET_ASSET,
                re_result.event_list,
            ))
        assert len(get_asset_events) == 1
        assert get_asset_events[0].event_specific_data.step_key == "solid_a"
Example #26
0
def test_shameful_workaround():
    graphql_context = define_test_out_of_process_context(
        DagsterInstance.ephemeral())

    pipeline_run = graphql_context.instance.create_run_for_pipeline(
        pipeline_def=csv_hello_world,
        run_config=csv_hello_world_solids_config())

    result = execute_dagster_graphql(
        graphql_context,
        EXECUTE_RUN_IN_PROCESS_MUTATION,
        variables={
            'runId': pipeline_run.run_id,
            # in corect in process name represents launching from user process
            'repositoryLocationName': IN_PROCESS_NAME,
            'repositoryName': main_repo_name(),
        },
    )
    assert result.data
    assert result.data['executeRunInProcess'][
        '__typename'] == 'ExecuteRunInProcessSuccess'

    result = execute_dagster_graphql(
        graphql_context,
        EXECUTE_RUN_IN_PROCESS_MUTATION,
        variables={
            'runId': pipeline_run.run_id,
            # but we don't apply workaround to other names
            'repositoryLocationName': 'some_other_name',
            'repositoryName': main_repo_name(),
        },
    )
    assert result.data
    assert result.data['executeRunInProcess'][
        '__typename'] == 'PipelineNotFoundError'
def test_pipeline_run_creation_race():
    with seven.TemporaryDirectory() as tempdir:
        instance = DagsterInstance.local_temp(tempdir)
        run_id = 'run_id'

        # Spy on the result of add_run
        add_run_spy = Spy(instance._run_storage.add_run)  # pylint: disable=protected-access
        add_run_mock = mock.MagicMock(side_effect=add_run_spy)
        instance._run_storage.add_run = add_run_mock  # pylint: disable=protected-access

        # This invocation should successfully add the run to run storage
        pipeline_run = register_managed_run_for_test(instance, run_id=run_id)
        assert len(add_run_mock.call_args_list) == 1
        assert instance.has_run(run_id)

        # Check that add_run did not receive DagsterRunAlreadyExists exception and that
        # it successfully returned
        assert add_run_spy.exceptions == []
        assert len(add_run_spy.return_values) == 1

        # (*) Simulate a race where second invocation receives has_run() is False
        fetched_pipeline_run = ''
        with mock.patch.object(instance, 'has_run', mock.MagicMock(return_value=False)):
            fetched_pipeline_run = register_managed_run_for_test(instance, run_id=run_id)

        # Check that add_run received DagsterRunAlreadyExists exception and did not return value
        assert len(add_run_mock.call_args_list) == 2
        assert add_run_spy.exceptions == [DagsterRunAlreadyExists]
        assert len(add_run_spy.return_values) == 1

        assert pipeline_run == fetched_pipeline_run
        assert instance.has_run(run_id)
        assert len(instance.get_runs()) == 1
Example #28
0
def test_multiproc_markers():
    pipe = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'define_pipeline'
    ).build_pipeline_definition()
    instance = DagsterInstance.local_temp()
    result = execute_pipeline(
        pipe,
        instance=instance,
        environment_dict={'execution': {'multiprocess': {}}, 'storage': {'filesystem': {}}},
    )
    assert result.success
    events = instance.all_logs(result.run_id)
    start_markers = {}
    end_markers = {}
    for event in events:
        dagster_event = event.dagster_event
        if dagster_event.is_engine_event:
            if dagster_event.engine_event_data.marker_start:
                key = '{step}.{marker}'.format(
                    step=event.step_key, marker=dagster_event.engine_event_data.marker_start
                )
                start_markers[key] = event.timestamp
            if dagster_event.engine_event_data.marker_end:
                key = '{step}.{marker}'.format(
                    step=event.step_key, marker=dagster_event.engine_event_data.marker_end
                )
                end_markers[key] = event.timestamp

    seen = set()
    assert set(start_markers.keys()) == set(end_markers.keys())
    for key in end_markers:
        assert end_markers[key] - start_markers[key] > 0
        seen.add(key)

    assert 'ping.compute.multiprocess_subprocess_init' in end_markers
Example #29
0
def test_sync_run_launcher_run():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(
            temp_dir,
            overrides={
                "run_launcher": {
                    "module":
                    "dagster.core.launcher.sync_in_memory_run_launcher",
                    "class": "SyncInMemoryRunLauncher",
                }
            },
        )

        external_repo = get_main_external_repo(instance)
        external_pipeline = external_repo.get_full_external_pipeline(
            "noop_pipeline")

        run = create_run_for_test(instance=instance,
                                  pipeline_name=external_pipeline.name)

        run = instance.run_launcher.launch_run(
            instance=instance, run=run, external_pipeline=external_pipeline)

        completed_run = instance.get_run_by_id(run.run_id)
        assert completed_run.is_success
Example #30
0
def test_materialized_assets():
    instance = DagsterInstance.ephemeral()
    res = execute_pipeline(materialization_pipeline, instance=instance)
    assert res.success
    asset_keys = instance.all_asset_keys()
    assert len(asset_keys) == 1
    assert asset_keys[0] == AssetKey(["dashboards", "analytics_dashboard"])