Esempio n. 1
0
def test_multi_python_environment_workspace():
    workspace = load_workspace_from_config(
        yaml.safe_load(_get_multi_location_workspace_yaml()),
        # fake out as if it were loaded by a yaml file in this directory
        file_relative_path(__file__, 'not_a_real.yaml'),
        UserProcessApi.CLI,
    )
    assert isinstance(workspace, Workspace)
    assert len(workspace.repository_location_handles) == 6
    assert workspace.has_repository_location_handle('loaded_from_file')
    assert workspace.has_repository_location_handle('loaded_from_module')
    assert workspace.has_repository_location_handle('named_loaded_from_file')
    assert workspace.has_repository_location_handle('named_loaded_from_module')

    loaded_from_file_handle = workspace.get_repository_location_handle(
        'loaded_from_file')
    assert set(
        loaded_from_file_handle.repository_code_pointer_dict.keys()) == {
            'hello_world_repository'
        }
    assert isinstance(loaded_from_file_handle,
                      PythonEnvRepositoryLocationHandle)

    loaded_from_module_handle = workspace.get_repository_location_handle(
        'loaded_from_module')
    assert set(
        loaded_from_module_handle.repository_code_pointer_dict.keys()) == {
            'hello_world_repository'
        }
    assert isinstance(loaded_from_file_handle,
                      PythonEnvRepositoryLocationHandle)

    named_loaded_from_file_handle = workspace.get_repository_location_handle(
        'named_loaded_from_file')
    assert set(
        named_loaded_from_file_handle.repository_code_pointer_dict.keys()) == {
            'hello_world_repository_name'
        }
    assert isinstance(named_loaded_from_file_handle,
                      PythonEnvRepositoryLocationHandle)

    named_loaded_from_module_handle = workspace.get_repository_location_handle(
        'named_loaded_from_module')
    assert set(named_loaded_from_module_handle.repository_code_pointer_dict.
               keys()) == {'hello_world_repository_name'}
    assert isinstance(named_loaded_from_file_handle,
                      PythonEnvRepositoryLocationHandle)

    named_loaded_from_module_attribute_handle = workspace.get_repository_location_handle(
        'named_loaded_from_module_attribute')
    assert set(
        named_loaded_from_module_attribute_handle.repository_code_pointer_dict.
        keys()) == {'hello_world_repository_name'}
    assert isinstance(named_loaded_from_module_attribute_handle,
                      PythonEnvRepositoryLocationHandle)

    named_loaded_from_file_attribute_handle = workspace.get_repository_location_handle(
        'named_loaded_from_file_attribute')
    assert set(
        named_loaded_from_file_attribute_handle.repository_code_pointer_dict.
        keys()) == {'hello_world_repository_name'}
    assert isinstance(named_loaded_from_file_attribute_handle,
                      PythonEnvRepositoryLocationHandle)
Esempio n. 2
0
def test_safe_isfile():
    assert safe_isfile(file_relative_path(__file__, "test_file_utils.py"))
    assert not safe_isfile(file_relative_path(__file__, "not_a_file.py"))
Esempio n. 3
0
# pylint: disable=unused-argument, no-value-for-parameter, no-member

# start_marker
import os
from typing import List

from dagster import DynamicOut, DynamicOutput, Field, job, op
from dagster.utils import file_relative_path


@op(
    config_schema={
        "path": Field(str,
                      default_value=file_relative_path(__file__, "sample"))
    },
    out=DynamicOut(str),
)
def files_in_directory(context):
    path = context.op_config["path"]
    dirname, _, filenames = next(os.walk(path))
    for file in filenames:
        yield DynamicOutput(
            value=os.path.join(dirname, file),
            # create a mapping key from the file name
            mapping_key=file.replace(".", "_").replace("-", "_"),
        )


@op
def process_file(path: str) -> int:
    # simple example of calculating size
Esempio n. 4
0
"""Compile the proto definitions into Python.

This tooling should be invoked to regenerate the Python grpc artifacts by running:

    python -m dagster.grpc.compile
"""
import os
import shutil
import subprocess
import sys

from dagster.utils import file_relative_path, safe_tempfile_path

PROTOS_DIR = file_relative_path(__file__, "protos")

PROTOS_PATH = os.path.join(PROTOS_DIR, "api.proto")

GENERATED_DIR = file_relative_path(__file__, "__generated__")

GENERATED_PB2_PATH = os.path.join(GENERATED_DIR, "api_pb2.py")

GENERATED_GRPC_PATH = os.path.join(GENERATED_DIR, "api_pb2_grpc.py")

ISORT_SETTINGS_PATH = file_relative_path(__file__, "../../../../")

GENERATED_HEADER = [
    ("# @" + "generated\n"),  # This is to avoid matching the phab rule
    "\n",
    "# This file was generated by running `python -m dagster.grpc.compile`\n",
    "# Do not edit this file directly, and do not attempt to recompile it using\n",
    "# grpc_tools.protoc directly, as several changes must be made to the raw output\n",
from dagster.serdes.ipc import (
    interrupt_ipc_subprocess,
    open_ipc_subprocess,
    setup_interrupt_support,
)
from dagster.utils import file_relative_path

if __name__ == '__main__':
    setup_interrupt_support()
    (
        child_opened_sentinel,
        parent_interrupt_sentinel,
        child_started_sentinel,
        child_interrupt_sentinel,
    ) = (sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
    child_process = open_ipc_subprocess([
        sys.executable,
        file_relative_path(__file__, 'subprocess_with_interrupt_support.py'),
        child_started_sentinel,
        child_interrupt_sentinel,
    ])
    with open(child_opened_sentinel, 'w') as fd:
        fd.write('opened_ipc_subprocess')
    try:
        while True:
            time.sleep(0.1)
    except KeyboardInterrupt:
        interrupt_ipc_subprocess(child_process)
        with open(parent_interrupt_sentinel, 'w') as fd:
            fd.write('parent_received_keyboard_interrupt')
Esempio n. 6
0
def test_import_module_from_path():
    foo_module = seven.import_module_from_path(
        "foo_module", file_relative_path(__file__, "foo_module.py"))
    assert foo_module.FOO == 7
Esempio n. 7
0
import pandas as pd
from dagster import EventMetadata
from dagster.core.asset_defs import build_assets_job
from dagster.utils import file_relative_path
from dagster_dbt import dbt_cli_resource
from dagster_dbt.asset_defs import load_assets_from_dbt_manifest
from dagster_pyspark import pyspark_resource
from hacker_news_assets.pipelines.download_pipeline import S3_SPARK_CONF
from hacker_news_assets.resources.snowflake_io_manager import (
    SHARED_SNOWFLAKE_CONF,
    connect_snowflake,
    snowflake_io_manager_dev,
    snowflake_io_manager_prod,
)

DBT_PROJECT_DIR = file_relative_path(__file__, "../../hacker_news_dbt")
DBT_PROFILES_DIR = DBT_PROJECT_DIR + "/config"

# We define two sets of resources, one for the prod mode, which writes to production schemas and
# one for dev mode, which writes to alternate schemas
PROD_RESOURCES = {
    "dbt":
    dbt_cli_resource.configured({
        "profiles_dir": DBT_PROFILES_DIR,
        "project_dir": DBT_PROJECT_DIR,
        "target": "prod"
    }),
    "warehouse_io_manager":
    snowflake_io_manager_prod,
    # "parquet_io_manager": parquet_io_manager.configured({"base_path": get_system_temp_directory()}),
    "pyspark":
Esempio n. 8
0
def test_start_mock_worker_config_from_yaml(worker_patch):
    args = ["-y", file_relative_path(__file__, "engine_config.yaml")]
    start_worker("dagster_test_worker", args=args)
    assert_called(worker_patch)
Esempio n. 9
0
                "field_six_nullable_int_list":
                Field([Noneable(int)], is_required=False),
            },
        },
    )
    def a_solid_with_multilayered_config(_):
        return None

    return a_solid_with_multilayered_config()


@pipeline(preset_defs=[
    PresetDefinition.from_files(
        name="prod",
        config_files=[
            file_relative_path(__file__,
                               "../environments/csv_hello_world_prod.yaml")
        ],
    ),
    PresetDefinition.from_files(
        name="test",
        config_files=[
            file_relative_path(__file__,
                               "../environments/csv_hello_world_test.yaml")
        ],
    ),
    PresetDefinition(
        name="test_inline",
        run_config={
            "solids": {
                "sum_solid": {
                    "inputs": {
Esempio n. 10
0
def test_list_command_cli():
    with instance_for_test():

        runner = CliRunner()

        result = runner.invoke(
            pipeline_list_command,
            ["-f", file_relative_path(__file__, "test_cli_commands.py"), "-a", "bar"],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            [
                "-f",
                file_relative_path(__file__, "test_cli_commands.py"),
                "-a",
                "bar",
                "-d",
                os.path.dirname(__file__),
            ],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands", "-a", "bar"],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command, ["-w", file_relative_path(__file__, "workspace.yaml")]
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            [
                "-w",
                file_relative_path(__file__, "workspace.yaml"),
                "-w",
                file_relative_path(__file__, "override.yaml"),
            ],
        )
        assert_correct_extra_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            [
                "-f",
                "foo.py",
                "-m",
                "dagster_tests.cli_tests.command_tests.test_cli_commands",
                "-a",
                "bar",
            ],
        )
        assert result.exit_code == 2

        result = runner.invoke(
            pipeline_list_command,
            ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands"],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command, ["-f", file_relative_path(__file__, "test_cli_commands.py")]
        )
        assert_correct_bar_repository_output(result)
Esempio n. 11
0
def test_start_worker_config_from_yaml(rabbitmq):
    args = ["-y", file_relative_path(__file__, "engine_config.yaml")]

    with cleanup_worker("dagster_test_worker", args=args):
        start_worker("dagster_test_worker", args=args)
        assert check_for_worker("dagster_test_worker", args=args)
Esempio n. 12
0
def test_interrupt_compute_log_tail_grandchild(
        windows_legacy_stdio_env,  # pylint: disable=redefined-outer-name, unused-argument
):
    with ExitStack() as context_stack:
        (
            child_opened_sentinel,
            parent_interrupt_sentinel,
            child_started_sentinel,
            stdout_pids_file,
            stderr_pids_file,
            child_interrupt_sentinel,
        ) = [
            context_stack.enter_context(safe_tempfile_path()) for _ in range(6)
        ]

        parent_process = open_ipc_subprocess([
            sys.executable,
            file_relative_path(__file__, "parent_compute_log_subprocess.py"),
            child_opened_sentinel,
            parent_interrupt_sentinel,
            child_started_sentinel,
            stdout_pids_file,
            stderr_pids_file,
            child_interrupt_sentinel,
        ])

        wait_for_file(child_opened_sentinel)
        wait_for_file(child_started_sentinel)

        wait_for_file(stdout_pids_file)
        with open(stdout_pids_file, "r") as stdout_pids_fd:
            stdout_pids_str = stdout_pids_fd.read()
            assert stdout_pids_str.startswith("stdout pids:")
            stdout_pids = list(
                map(
                    lambda x: int(x) if x != "None" else None,
                    [x.strip("(),") for x in stdout_pids_str.split(" ")[2:]],
                ))

        wait_for_file(stderr_pids_file)
        with open(stderr_pids_file, "r") as stderr_pids_fd:
            stderr_pids_str = stderr_pids_fd.read()
            assert stderr_pids_str.startswith("stderr pids:")
            stderr_pids = list(
                map(
                    lambda x: int(x) if x != "None" else None,
                    [x.strip("(),") for x in stderr_pids_str.split(" ")[2:]],
                ))

        interrupt_ipc_subprocess(parent_process)

        wait_for_file(child_interrupt_sentinel)
        with open(child_interrupt_sentinel, "r") as fd:
            assert fd.read().startswith("compute_log_subprocess_interrupt")

        wait_for_file(parent_interrupt_sentinel)
        with open(parent_interrupt_sentinel, "r") as fd:
            assert fd.read().startswith("parent_received_keyboard_interrupt")

        for stdout_pid in stdout_pids:
            if stdout_pid is not None:
                wait_for_process(stdout_pid)

        for stderr_pid in stderr_pids:
            if stderr_pid is not None:
                wait_for_process(stderr_pid)
Esempio n. 13
0
def test_create_app_with_workspace():
    with load_workspace_from_yaml_paths(
        [file_relative_path(__file__, "./workspace.yaml")], ) as workspace:
        assert create_app_from_workspace(workspace,
                                         DagsterInstance.ephemeral())
Esempio n. 14
0
def test_grpc_multi_location_workspace():

    workspace = load_workspace_from_config(
        yaml.safe_load(_get_multi_location_workspace_yaml()),
        # fake out as if it were loaded by a yaml file in this directory
        file_relative_path(__file__, 'not_a_real.yaml'),
        UserProcessApi.GRPC,
    )

    assert isinstance(workspace, Workspace)
    assert len(workspace.repository_location_handles) == 6
    assert workspace.has_repository_location_handle('loaded_from_file')
    assert workspace.has_repository_location_handle('loaded_from_module')

    loaded_from_file_handle = workspace.get_repository_location_handle(
        'loaded_from_file')
    assert isinstance(loaded_from_file_handle,
                      ManagedGrpcPythonEnvRepositoryLocationHandle)

    assert loaded_from_file_handle.repository_names == {
        'hello_world_repository'
    }

    loaded_from_module_handle = workspace.get_repository_location_handle(
        'loaded_from_module')
    assert isinstance(loaded_from_module_handle,
                      ManagedGrpcPythonEnvRepositoryLocationHandle)

    assert loaded_from_module_handle.repository_names == {
        'hello_world_repository'
    }

    named_loaded_from_file_handle = workspace.get_repository_location_handle(
        'named_loaded_from_file')
    assert named_loaded_from_file_handle.repository_names == {
        'hello_world_repository_name'
    }
    assert isinstance(named_loaded_from_file_handle,
                      ManagedGrpcPythonEnvRepositoryLocationHandle)

    named_loaded_from_module_handle = workspace.get_repository_location_handle(
        'named_loaded_from_module')
    assert named_loaded_from_module_handle.repository_names == {
        'hello_world_repository_name'
    }
    assert isinstance(named_loaded_from_file_handle,
                      ManagedGrpcPythonEnvRepositoryLocationHandle)

    named_loaded_from_module_attribute_handle = workspace.get_repository_location_handle(
        'named_loaded_from_module_attribute')
    assert named_loaded_from_module_attribute_handle.repository_names == {
        'hello_world_repository_name'
    }
    assert isinstance(named_loaded_from_module_attribute_handle,
                      ManagedGrpcPythonEnvRepositoryLocationHandle)

    named_loaded_from_file_attribute_handle = workspace.get_repository_location_handle(
        'named_loaded_from_file_attribute')
    assert named_loaded_from_file_attribute_handle.repository_names == {
        'hello_world_repository_name'
    }
    assert isinstance(named_loaded_from_file_attribute_handle,
                      ManagedGrpcPythonEnvRepositoryLocationHandle)
Esempio n. 15
0
def test_0_7_6_postgres_pre_add_pipeline_snapshot(hostname, conn_string):
    engine = create_engine(conn_string)
    engine.execute("drop schema public cascade;")
    engine.execute("create schema public;")

    env = os.environ.copy()
    env["PGPASSWORD"] = "******"
    subprocess.check_call(
        [
            "psql",
            "-h",
            hostname,
            "-p",
            "5432",
            "-U",
            "test",
            "-f",
            file_relative_path(
                __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/postgres/pg_dump.txt"
            ),
        ],
        env=env,
    )

    run_id = "d5f89349-7477-4fab-913e-0925cef0a959"

    with seven.TemporaryDirectory() as tempdir:
        with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd:
            with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd:
                template = template_fd.read().format(hostname=hostname)
                target_fd.write(template)

        instance = DagsterInstance.from_config(tempdir)

        @solid
        def noop_solid(_):
            pass

        @pipeline
        def noop_pipeline():
            noop_solid()

        with pytest.raises(
            DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision=None)
        ):
            execute_pipeline(noop_pipeline, instance=instance)

        # ensure migration is run
        instance.upgrade()

        runs = instance.get_runs()

        assert len(runs) == 1

        assert runs[0].run_id == run_id

        run = instance.get_run_by_id(run_id)

        assert run.run_id == run_id
        assert run.pipeline_snapshot_id is None
        result = execute_pipeline(noop_pipeline, instance=instance)

        assert result.success

        runs = instance.get_runs()
        assert len(runs) == 2

        new_run_id = result.run_id

        new_run = instance.get_run_by_id(new_run_id)

        assert new_run.pipeline_snapshot_id
Esempio n. 16
0
def test_presets():
    @solid(config_schema={'error': Bool})
    def can_fail(context):
        if context.solid_config['error']:
            raise Exception('I did an error')
        return 'cool'

    @lambda_solid
    def always_fail():
        raise Exception('I always do this')

    pipe = PipelineDefinition(
        name='simple',
        solid_defs=[can_fail, always_fail],
        preset_defs=[
            PresetDefinition.from_files(
                'passing',
                config_files=[file_relative_path(__file__, 'pass_env.yaml')],
                solid_selection=['can_fail'],
            ),
            PresetDefinition.from_files(
                'passing_overide_to_fail',
                config_files=[file_relative_path(__file__, 'pass_env.yaml')],
                solid_selection=['can_fail'],
            ).with_additional_config({'solids': {'can_fail': {'config': {'error': True}}}}),
            PresetDefinition(
                'passing_direct_dict',
                run_config={'solids': {'can_fail': {'config': {'error': False}}}},
                solid_selection=['can_fail'],
            ),
            PresetDefinition.from_files(
                'failing_1',
                config_files=[file_relative_path(__file__, 'fail_env.yaml')],
                solid_selection=['can_fail'],
            ),
            PresetDefinition.from_files(
                'failing_2', config_files=[file_relative_path(__file__, 'pass_env.yaml')]
            ),
            PresetDefinition('subset', solid_selection=['can_fail'],),
        ],
    )

    with pytest.raises(DagsterInvariantViolationError):
        PresetDefinition.from_files(
            'invalid_1', config_files=[file_relative_path(__file__, 'not_a_file.yaml')]
        )

    with pytest.raises(DagsterInvariantViolationError):
        PresetDefinition.from_files(
            'invalid_2',
            config_files=[file_relative_path(__file__, 'test_repository_definition.py')],
        )

    assert execute_pipeline(pipe, preset='passing').success

    assert execute_pipeline(pipe, preset='passing_direct_dict').success
    assert execute_pipeline(pipe, preset='failing_1', raise_on_error=False).success == False

    assert execute_pipeline(pipe, preset='failing_2', raise_on_error=False).success == False

    with pytest.raises(DagsterInvariantViolationError, match='Could not find preset'):
        execute_pipeline(pipe, preset='not_failing', raise_on_error=False)

    assert (
        execute_pipeline(pipe, preset='passing_overide_to_fail', raise_on_error=False).success
        == False
    )

    assert execute_pipeline(
        pipe, preset='passing', run_config={'solids': {'can_fail': {'config': {'error': False}}}},
    ).success

    with pytest.raises(
        check.CheckError,
        match=re.escape(
            'The environment set in preset \'passing\' does not agree with the environment passed '
            'in the `run_config` argument.'
        ),
    ):
        execute_pipeline(
            pipe,
            preset='passing',
            run_config={'solids': {'can_fail': {'config': {'error': True}}}},
        )

    assert execute_pipeline(
        pipe, preset='subset', run_config={'solids': {'can_fail': {'config': {'error': False}}}},
    ).success
Esempio n. 17
0
def test_0_7_6_postgres_pre_event_log_migration(hostname, conn_string):
    engine = create_engine(conn_string)
    engine.execute("drop schema public cascade;")
    engine.execute("create schema public;")

    env = os.environ.copy()
    env["PGPASSWORD"] = "******"
    subprocess.check_call(
        [
            "psql",
            "-h",
            hostname,
            "-p",
            "5432",
            "-U",
            "test",
            "-f",
            file_relative_path(
                __file__, "snapshot_0_7_6_pre_event_log_migration/postgres/pg_dump.txt"
            ),
        ],
        env=env,
    )

    run_id = "ca7f1e33-526d-4f75-9bc5-3e98da41ab97"

    with seven.TemporaryDirectory() as tempdir:
        with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd:
            with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd:
                template = template_fd.read().format(hostname=hostname)
                target_fd.write(template)

        instance = DagsterInstance.from_config(tempdir)

        # Runs will appear in DB, but event logs need migration
        runs = instance.get_runs()
        assert len(runs) == 1
        assert instance.get_run_by_id(run_id)

        # Make sure the schema is migrated
        instance.upgrade()

        assert isinstance(instance._event_storage, SqlEventLogStorage)
        events_by_id = instance._event_storage.get_logs_for_run_by_log_id(run_id)
        assert len(events_by_id) == 40

        step_key_records = []
        for record_id, _event in events_by_id.items():
            row_data = instance._event_storage.get_event_log_table_data(run_id, record_id)
            if row_data.step_key is not None:
                step_key_records.append(row_data)
        assert len(step_key_records) == 0

        # run the event_log data migration
        migrate_event_log_data(instance=instance)

        step_key_records = []
        for record_id, _event in events_by_id.items():
            row_data = instance._event_storage.get_event_log_table_data(run_id, record_id)
            if row_data.step_key is not None:
                step_key_records.append(row_data)
        assert len(step_key_records) > 0
Esempio n. 18
0
def test_basic_execute_plan_with_materialization():
    with get_temp_file_name() as out_csv_path:

        environment_dict = {
            'solids': {
                'sum_solid': {
                    'inputs': {
                        'num': file_relative_path(__file__, '../data/num.csv')
                    },
                    'outputs': [{
                        'result': out_csv_path
                    }],
                }
            }
        }

        instance = DagsterInstance.ephemeral()

        result = execute_dagster_graphql(
            define_context(instance=instance),
            EXECUTION_PLAN_QUERY,
            variables={
                'pipeline': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData': environment_dict,
                'mode': 'default',
            },
        )

        steps_data = result.data['executionPlan']['steps']

        assert [step_data['key'] for step_data in steps_data] == [
            'sum_solid.compute',
            'sum_sq_solid.compute',
        ]

        run_id = str(uuid.uuid4())
        instance.create_empty_run(run_id, 'csv_hello_world')

        result = execute_dagster_graphql(
            define_context(instance=instance),
            EXECUTE_PLAN_QUERY,
            variables={
                'executionParams': {
                    'selector': {
                        'name': 'csv_hello_world'
                    },
                    'environmentConfigData': environment_dict,
                    'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'],
                    'executionMetadata': {
                        'runId': run_id
                    },
                    'mode': 'default',
                }
            },
        )

        assert result.data

        step_mat_event = None

        for message in result.data['executePlan']['stepEvents']:
            if message['__typename'] == 'StepMaterializationEvent':
                # ensure only one event
                assert step_mat_event is None
                step_mat_event = message

        # ensure only one event
        assert step_mat_event
        assert step_mat_event['materialization']
        assert len(step_mat_event['materialization']['metadataEntries']) == 1
        metadata_entry = step_mat_event['materialization']['metadataEntries'][
            0]
        assert metadata_entry['path'] == out_csv_path
def get_hello_world_path():
    return file_relative_path(__file__, "hello_world_repository.py")
Esempio n. 20
0
    svd.fit(user_story_matrix.matrix)

    total_explained_variance = svd.explained_variance_ratio_.sum()

    yield Output(
        svd,
        metadata={
            "Total explained variance ratio": total_explained_variance,
            "Number of components": n_components,
        },
    )


model_perf_notebook = define_dagstermill_solid(
    "recommender_model_perf",
    notebook_path=file_relative_path(
        __file__, "../notebooks/recommender_model_perf.ipynb"),
    input_defs=[
        InputDefinition(dagster_type=TruncatedSVD, name="recommender_model")
    ],
    output_notebook_name="perf_notebook",
)


@op(
    ins={
        "story_titles":
        In(
            root_manager_key="warehouse_loader",
            metadata={
                "table": "hackernews.stories",
                "columns": ["id", "title"],
Esempio n. 21
0
    datasource_name="getest", suite_name="basic.warning"
)
# end_ge_demo_marker_1


@pipeline(
    mode_defs=[ModeDefinition("basic", resource_defs={"ge_data_context": ge_data_context})],
    preset_defs=[
        PresetDefinition(
            "sample_preset_success",
            mode="basic",
            run_config={
                "resources": {
                    "ge_data_context": {
                        "config": {
                            "ge_root_dir": file_relative_path(__file__, "./great_expectations")
                        }
                    }
                },
                "solids": {
                    "read_in_datafile": {
                        "inputs": {
                            "csv_path": {"value": file_relative_path(__file__, "./succeed.csv")}
                        }
                    }
                },
            },
        ),
        PresetDefinition(
            "sample_preset_fail",
            mode="basic",
Esempio n. 22
0
 def emit_num_csv_local_file(_):
     return LocalFileHandle(file_relative_path(__file__, "../num.csv"))
Esempio n. 23
0
def get_test_project_recon_pipeline(pipeline_name):
    return ReOriginatedReconstructablePipelineForTest(
        ReconstructableRepository.for_file(
            file_relative_path(__file__, "test_pipelines/repo.py"),
            "define_demo_execution_repo",
        ).get_reconstructable_pipeline(pipeline_name))
Esempio n. 24
0
 def emit_num_special_csv_local_file(_):
     return LocalFileHandle(
         file_relative_path(__file__, "../num_with_special_chars.csv"))
Esempio n. 25
0
def test_double_repository():
    loadable_repos = loadable_targets_from_python_file(
        file_relative_path(__file__, "double_repository.py"), )

    assert set([lr.target_definition.name
                for lr in loadable_repos]) == {"repo_one", "repo_two"}
Esempio n. 26
0
def test_list_command():
    runner = CliRunner()

    execute_list_command(
        {
            'repository_yaml': None,
            'python_file': file_relative_path(__file__,
                                              'test_cli_commands.py'),
            'module_name': None,
            'fn_name': 'define_bar_repo',
        },
        no_print,
    )

    result = runner.invoke(
        pipeline_list_command,
        [
            '-f',
            file_relative_path(__file__, 'test_cli_commands.py'), '-n',
            'define_bar_repo'
        ],
    )

    assert result.exit_code == 0
    assert result.output == ('Repository bar\n'
                             '**************\n'
                             'Pipeline: baz\n'
                             'Description:\n'
                             'Not much tbh\n'
                             'Solids: (Execution Order)\n'
                             '    do_input\n'
                             '*************\n'
                             'Pipeline: foo\n'
                             'Solids: (Execution Order)\n'
                             '    do_something\n'
                             '    do_input\n')

    execute_list_command(
        {
            'repository_yaml': None,
            'python_file': None,
            'module_name': 'dagster_examples.intro_tutorial.repos',
            'fn_name': 'define_repo',
        },
        no_print,
    )

    result = runner.invoke(
        pipeline_list_command,
        ['-m', 'dagster_examples.intro_tutorial.repos', '-n', 'define_repo'])
    assert result.exit_code == 0
    assert result.output == ('Repository hello_cereal_repository\n'
                             '**********************************\n'
                             'Pipeline: complex_pipeline\n'
                             'Solids: (Execution Order)\n'
                             '    load_cereals\n'
                             '    sort_by_calories\n'
                             '    sort_by_protein\n'
                             '    display_results\n'
                             '*******************************\n'
                             'Pipeline: hello_cereal_pipeline\n'
                             'Solids: (Execution Order)\n'
                             '    hello_cereal\n')

    execute_list_command(
        {
            'repository_yaml':
            file_relative_path(__file__, 'repository_module.yaml'),
            'python_file':
            None,
            'module_name':
            None,
            'fn_name':
            None,
        },
        no_print,
    )

    result = runner.invoke(
        pipeline_list_command,
        ['-y', file_relative_path(__file__, 'repository_module.yaml')])
    assert result.exit_code == 0
    assert result.output == ('Repository hello_cereal_repository\n'
                             '**********************************\n'
                             'Pipeline: complex_pipeline\n'
                             'Solids: (Execution Order)\n'
                             '    load_cereals\n'
                             '    sort_by_calories\n'
                             '    sort_by_protein\n'
                             '    display_results\n'
                             '*******************************\n'
                             'Pipeline: hello_cereal_pipeline\n'
                             'Solids: (Execution Order)\n'
                             '    hello_cereal\n')

    with pytest.raises(UsageError):
        execute_list_command(
            {
                'repository_yaml': None,
                'python_file': 'foo.py',
                'module_name': 'dagster_examples.intro_tutorial.repos',
                'fn_name': 'define_repo',
            },
            no_print,
        )

    result = runner.invoke(
        pipeline_list_command,
        [
            '-f', 'foo.py', '-m', 'dagster_examples.intro_tutorial.repos',
            '-n', 'define_repo'
        ],
    )
    assert result.exit_code == 2

    with pytest.raises(UsageError):
        execute_list_command(
            {
                'repository_yaml': None,
                'python_file': None,
                'module_name': 'dagster_examples.intro_tutorial.repos',
                'fn_name': None,
            },
            no_print,
        )

    result = runner.invoke(pipeline_list_command,
                           ['-m', 'dagster_examples.intro_tutorial.repos'])
    assert result.exit_code == 2

    with pytest.raises(UsageError):
        execute_list_command(
            {
                'repository_yaml': None,
                'python_file': file_relative_path(__file__,
                                                  'test_cli_commands.py'),
                'module_name': None,
                'fn_name': None,
            },
            no_print,
        )

    result = runner.invoke(
        pipeline_list_command,
        ['-f', file_relative_path(__file__, 'test_cli_commands.py')])
    assert result.exit_code == 2
Esempio n. 27
0
def test_script_relative_path_file_relative_path_equiv():
    assert file_relative_path(__file__,
                              "foo") == file_relative_path(__file__, "foo")
Esempio n. 28
0
def test_tags_pipeline():
    runner = CliRunner()
    with mocked_instance() as instance:
        result = runner.invoke(
            pipeline_execute_command,
            [
                '-y',
                file_relative_path(__file__, 'repository_module.yaml'),
                '--tags',
                '{ "foo": "bar" }',
                'hello_cereal_pipeline',
            ],
        )
        assert result.exit_code == 0
        runs = instance.get_runs()
        assert len(runs) == 1
        run = runs[0]
        assert len(run.tags) == 1
        assert run.tags.get('foo') == 'bar'

    with mocked_instance() as instance:
        result = runner.invoke(
            pipeline_execute_command,
            [
                '-y',
                file_relative_path(__file__, '../repository.yaml'),
                '-p',
                'add',
                '--tags',
                '{ "foo": "bar" }',
                'multi_mode_with_resources',  # pipeline name
            ],
        )
        assert result.exit_code == 0
        runs = instance.get_runs()
        assert len(runs) == 1
        run = runs[0]
        assert len(run.tags) == 1
        assert run.tags.get('foo') == 'bar'

    with mocked_instance() as instance:
        result = runner.invoke(
            pipeline_backfill_command,
            [
                '-y',
                file_relative_path(__file__, 'repository_file.yaml'),
                '--noprompt',
                '--partition-set',
                'baz_partitions',
                '--partitions',
                'c',
                '--tags',
                '{ "foo": "bar" }',
                'baz',
            ],
        )
        assert result.exit_code == 0
        runs = instance.run_launcher.queue()
        assert len(runs) == 1
        run = runs[0]
        assert len(run.tags) >= 1
        assert run.tags.get('foo') == 'bar'
Esempio n. 29
0
def test_load_with_empty_working_directory(capfd):
    port = find_free_port()
    # File that will fail if working directory isn't set to default
    python_file = file_relative_path(__file__,
                                     "grpc_repo_with_local_import.py")

    subprocess_args = [
        "dagster",
        "api",
        "grpc",
        "--port",
        str(port),
        "--python-file",
        python_file,
    ]

    with new_cwd(os.path.dirname(__file__)):
        process = subprocess.Popen(
            subprocess_args,
            stdout=subprocess.PIPE,
        )

        try:
            wait_for_grpc_server(
                process, DagsterGrpcClient(port=port, host="localhost"),
                subprocess_args)
            assert DagsterGrpcClient(port=port).ping("foobar") == "foobar"
        finally:
            process.terminate()

        # indicating the working directory is empty fails

        port = find_free_port()
        subprocess_args = [
            "dagster",
            "api",
            "grpc",
            "--port",
            str(port),
            "--python-file",
            python_file,
            "--empty-working-directory",
        ]

        process = subprocess.Popen(
            subprocess_args,
            stdout=subprocess.PIPE,
        )
        try:
            with pytest.raises(Exception):
                wait_for_grpc_server(
                    process, DagsterGrpcClient(port=port, host="localhost"),
                    subprocess_args)

            process.wait()

            _, err = capfd.readouterr()
            assert "No module named" in err
        finally:
            if process.poll() is None:
                process.terminate()
Esempio n. 30
0
def test_workspace():
    with load_workspace_from_yaml_paths(
        [file_relative_path(__file__, "../workspace.yaml")]) as workspace:
        assert len(workspace.repository_location_handles) == 2