Ejemplo n.º 1
0
def test_reconstructable_cli_args():
    recon_file = ReconstructableRepository.for_file(
        "foo_file", "bar_function", "/path/to/working_dir"
    )
    assert recon_file.get_cli_args() == "-f {foo_file} -a bar_function -d {working_directory}".format(
        foo_file=os.path.abspath(os.path.expanduser("foo_file")),
        working_directory=os.path.abspath(os.path.expanduser("/path/to/working_dir")),
    )
    recon_file = ReconstructableRepository.for_file("foo_file", "bar_function")
    assert recon_file.get_cli_args() == "-f {foo_file} -a bar_function -d {working_dir}".format(
        foo_file=os.path.abspath(os.path.expanduser("foo_file")), working_dir=os.getcwd()
    )
    recon_module = ReconstructableRepository.for_module("foo_module", "bar_function")
    assert recon_module.get_cli_args() == "-m foo_module -a bar_function"
Ejemplo n.º 2
0
def test_reconstructable_cli_args():
    recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function',
                                                    '/path/to/working_dir')
    assert recon_file.get_cli_args(
    ) == '-f {foo_file} -a bar_function -d /path/to/working_dir'.format(
        foo_file=os.path.abspath(os.path.expanduser('foo_file')))
    recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function')
    assert recon_file.get_cli_args(
    ) == '-f {foo_file} -a bar_function -d {working_dir}'.format(
        foo_file=os.path.abspath(os.path.expanduser('foo_file')),
        working_dir=os.getcwd())
    recon_module = ReconstructableRepository.for_module(
        'foo_module', 'bar_function')
    assert recon_module.get_cli_args() == '-m foo_module -a bar_function'
def test_bad_should_execute():
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            instance = DagsterInstance.get()

            recon_repo = ReconstructableRepository.for_file(
                __file__, 'the_repo')
            bad_should_execute = recon_repo.get_reconstructable_schedule(
                'bad_should_execute_schedule')

            result = sync_launch_scheduled_execution(
                bad_should_execute.get_origin())
            assert isinstance(result, ScheduledExecutionFailed)
            assert (
                'Error occurred during the execution of should_execute for schedule bad_should_execute_schedule'
                in result.errors[0].to_string())

            assert not result.run_id

            ticks = instance.get_schedule_ticks(
                bad_should_execute.get_origin_id())
            assert ticks[0].status == ScheduleTickStatus.FAILURE
            assert (
                'Error occurred during the execution of should_execute for schedule bad_should_execute_schedule'
                in ticks[0].error.message)
Ejemplo n.º 4
0
    def test_run_finished(self, graphql_context):
        instance = graphql_context.instance

        pipeline = ReconstructableRepository.for_file(
            file_relative_path(__file__, "setup.py"), "test_repo",
        ).get_reconstructable_pipeline("noop_pipeline")

        pipeline_result = execute_pipeline(pipeline, instance=instance)
        assert pipeline_result.success
        assert pipeline_result.run_id

        time.sleep(0.05)  # guarantee execution finish

        result = execute_dagster_graphql(
            graphql_context, RUN_CANCELLATION_QUERY, variables={"runId": pipeline_result.run_id}
        )

        assert (
            result.data["terminatePipelineExecution"]["__typename"]
            == "TerminatePipelineExecutionFailure"
        )
        assert (
            "is not in a started state. Current status is SUCCESS"
            in result.data["terminatePipelineExecution"]["message"]
        )
def cli_api_schedule_origin(schedule_name):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            recon_repo = ReconstructableRepository.for_file(
                __file__, 'the_repo')
            schedule = recon_repo.get_reconstructable_schedule(schedule_name)
            yield schedule.get_origin()
Ejemplo n.º 6
0
def recon_repo_for_cli_args(kwargs):
    """Builds a ReconstructableRepository for CLI arguments, which can be any of the combinations
    for repo loading above.
    """
    check.dict_param(kwargs, "kwargs")
    _cli_load_invariant(kwargs.get("pipeline_name") is None)

    if kwargs.get("workspace"):
        check.not_implemented(
            "Workspace not supported yet in this cli command")

    elif kwargs.get("module_name") and kwargs.get("fn_name"):
        _cli_load_invariant(kwargs.get("repository_yaml") is None)
        _cli_load_invariant(kwargs.get("python_file") is None)
        return ReconstructableRepository.for_module(
            kwargs["module_name"],
            kwargs["fn_name"],
            get_working_directory_from_kwargs(kwargs),
        )

    elif kwargs.get("python_file") and kwargs.get("fn_name"):
        _cli_load_invariant(kwargs.get("repository_yaml") is None)
        _cli_load_invariant(kwargs.get("module_name") is None)
        return ReconstructableRepository.for_file(
            os.path.abspath(kwargs["python_file"]),
            kwargs["fn_name"],
            get_working_directory_from_kwargs(kwargs),
        )
    else:
        _cli_load_invariant(False)
Ejemplo n.º 7
0
def recon_repo_for_cli_args(kwargs):
    '''Builds a ReconstructableRepository for CLI arguments, which can be any of the combinations
    for repo loading above.
    '''
    check.dict_param(kwargs, 'kwargs')
    _cli_load_invariant(kwargs.get('pipeline_name') is None)

    if kwargs.get('repository_yaml') or all_none(kwargs):
        _cli_load_invariant(kwargs.get('module_name') is None)
        _cli_load_invariant(kwargs.get('python_file') is None)
        _cli_load_invariant(kwargs.get('fn_name') is None)
        repo_yaml = (
            os.path.abspath(kwargs.get('repository_yaml'))
            if kwargs.get('repository_yaml')
            else DEFAULT_REPOSITORY_YAML_FILENAME
        )
        _cli_load_invariant(
            os.path.exists(repo_yaml),
            'Expected to use file "{}" to load repository but it does not exist. '
            'Verify your current working directory or CLI arguments.'.format(repo_yaml),
        )
        return ReconstructableRepository.from_yaml(repo_yaml)
    elif kwargs.get('module_name') and kwargs.get('fn_name'):
        _cli_load_invariant(kwargs.get('repository_yaml') is None)
        _cli_load_invariant(kwargs.get('python_file') is None)
        return ReconstructableRepository.for_module(kwargs['module_name'], kwargs['fn_name'])

    elif kwargs.get('python_file') and kwargs.get('fn_name'):
        _cli_load_invariant(kwargs.get('repository_yaml') is None)
        _cli_load_invariant(kwargs.get('module_name') is None)
        return ReconstructableRepository.for_file(
            os.path.abspath(kwargs['python_file']), kwargs['fn_name']
        )
    else:
        _cli_load_invariant(False)
Ejemplo n.º 8
0
def get_test_project_external_pipeline(pipeline_name):
    return (InProcessRepositoryLocation(
        ReconstructableRepository.for_file(
            file_relative_path(__file__, 'test_pipelines/repo.py'),
            'define_demo_execution_repo',
        )).get_repository('demo_execution_repo').get_full_external_pipeline(
            pipeline_name))
def test_origin_ids_stable(monkeypatch):
    # This test asserts fixed schedule origin IDs to prevent any changes from
    # accidentally shifting these ids that are persisted to ScheduleStorage

    # stable exe path for test
    monkeypatch.setattr(sys, 'executable', '/fake/python')

    file_repo = ReconstructableRepository.for_file('/path/to/file', 'the_repo')

    # ensure monkeypatch worked
    assert file_repo.get_origin().executable_path == '/fake/python'

    assert file_repo.get_origin_id(
    ) == '9ce1e0f6f059725bb6f85deeaea0322734bd77d6'
    schedule = file_repo.get_reconstructable_schedule('simple_schedule')
    assert schedule.get_origin_id(
    ) == 'f1a21671fccf4c986d20f40cac0730e47daa0183'

    module_repo = ReconstructableRepository.for_module('dummy_module',
                                                       'the_repo')
    assert module_repo.get_origin_id(
    ) == '86503fc349d4ecf44bd22ca1de64c10f8ffcebbd'
    module_schedule = module_repo.get_reconstructable_schedule(
        'simple_schedule')
    assert module_schedule.get_origin_id(
    ) == 'e4c7131b74ad600969876d8fa461f215ced9631a'
Ejemplo n.º 10
0
def test_reconstructable_cli_args():
    recon_file = ReconstructableRepository.for_file('foo_file', 'bar_function')
    assert recon_file.get_cli_args() == '-f {foo_file} -a bar_function'.format(
        foo_file=os.path.abspath(os.path.expanduser('foo_file')))
    recon_module = ReconstructableRepository.for_module(
        'foo_module', 'bar_function')
    assert recon_module.get_cli_args() == '-m foo_module -a bar_function'
Ejemplo n.º 11
0
def get_test_project_recon_pipeline(pipeline_name, container_image=None):
    return ReOriginatedReconstructablePipelineForTest(
        ReconstructableRepository.for_file(
            file_relative_path(__file__, "test_pipelines/repo.py"),
            "define_demo_execution_repo",
            container_image=container_image,
        ).get_reconstructable_pipeline(pipeline_name))
Ejemplo n.º 12
0
def get_test_project_external_pipeline(pipeline_name):
    return (InProcessRepositoryLocationOrigin(
        ReconstructableRepository.for_file(
            file_relative_path(__file__, "test_pipelines/repo.py"),
            "define_demo_execution_repo",
        )).create_handle().create_location().get_repository(
            "demo_execution_repo").get_full_external_pipeline(pipeline_name))
Ejemplo n.º 13
0
def define_subprocess_context_for_file(python_file, fn_name, instance):
    check.inst_param(instance, 'instance', DagsterInstance)
    return DagsterGraphQLInProcessRepositoryContext(
        recon_repo=ReconstructableRepository.for_file(python_file, fn_name),
        instance=instance,
        execution_manager=SubprocessExecutionManager(instance),
    )
Ejemplo n.º 14
0
def test_origin_ids_stable(monkeypatch):
    # This test asserts fixed schedule origin IDs to prevent any changes from
    # accidentally shifting these ids that are persisted to ScheduleStorage

    # stable exe path for test
    monkeypatch.setattr(sys, "executable", "/fake/python")

    file_repo = ReconstructableRepository.for_file("/path/to/file", "the_repo",
                                                   "/path/to/working_dir")

    # ensure monkeypatch worked
    assert file_repo.get_origin().executable_path == "/fake/python"

    assert file_repo.get_origin_id(
    ) == "3766b1c554fd961b88b9301756250febff3d0ffa"
    schedule = file_repo.get_reconstructable_schedule("simple_schedule")
    assert schedule.get_origin_id(
    ) == "7c60d01588673ffcaea16b6fd59d998dc63ed3c3"

    module_repo = ReconstructableRepository.for_module("dummy_module",
                                                       "the_repo")
    assert module_repo.get_origin_id(
    ) == "86503fc349d4ecf44bd22ca1de64c10f8ffcebbd"
    module_schedule = module_repo.get_reconstructable_schedule(
        "simple_schedule")
    assert module_schedule.get_origin_id(
    ) == "e4c7131b74ad600969876d8fa461f215ced9631a"
Ejemplo n.º 15
0
def _get_unloadable_sensor_origin():
    working_directory = os.path.dirname(__file__)
    recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist",
                                                    working_directory)
    return ExternalRepositoryOrigin(
        InProcessRepositoryLocationOrigin(recon_repo),
        "fake_repository").get_job_origin("doesnt_exist")
Ejemplo n.º 16
0
def get_test_project_external_repo(container_image=None):
    with InProcessRepositoryLocationOrigin(
            ReconstructableRepository.for_file(
                file_relative_path(__file__, "test_pipelines/repo.py"),
                "define_demo_execution_repo",
                container_image=container_image,
            )).create_location() as location:
        yield location.get_repository("demo_execution_repo")
Ejemplo n.º 17
0
def define_context_for_file(python_file, fn_name, instance):
    check.inst_param(instance, 'instance', DagsterInstance)
    return DagsterGraphQLContext(
        locations=[
            InProcessRepositoryLocation(ReconstructableRepository.for_file(python_file, fn_name))
        ],
        instance=instance,
    )
Ejemplo n.º 18
0
def workspace(instance, image):
    with in_process_test_workspace(
            instance,
            ReconstructableRepository.for_file(repo.__file__,
                                               repo.repository.__name__,
                                               container_image=image),
    ) as workspace:
        yield workspace
def test_run_always_finishes():  # pylint: disable=redefined-outer-name
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(
            temp_dir,
            overrides={
                "run_launcher": {
                    "module": "dagster.core.launcher.grpc_run_launcher",
                    "class": "GrpcRunLauncher",
                }
            },
        )

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=slow_pipeline, run_config=None)
        run_id = pipeline_run.run_id

        recon_repo = ReconstructableRepository.for_file(__file__, "nope")
        loadable_target_origin = LoadableTargetOrigin.from_python_origin(
            recon_repo.get_origin())

        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4)
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_grpc_server_location(
                    location_name="test",
                    port=api_client.port,
                    socket=api_client.socket,
                    host=api_client.host,
                ))

            external_pipeline = repository_location.get_repository(
                "nope").get_full_external_pipeline("slow_pipeline")

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            launcher = instance.run_launcher
            launcher.launch_run(instance=instance,
                                run=pipeline_run,
                                external_pipeline=external_pipeline)

        # Server process now receives shutdown event, run has not finished yet
        pipeline_run = instance.get_run_by_id(run_id)
        assert not pipeline_run.is_finished
        assert server_process.server_process.poll() is None

        # Server should wait until run finishes, then shutdown
        pipeline_run = poll_for_run(instance, run_id)
        assert pipeline_run.status == PipelineRunStatus.SUCCESS

        start_time = time.time()
        while server_process.server_process.poll() is None:
            time.sleep(0.05)
            # Verify server process cleans up eventually
            assert time.time() - start_time < 5

        server_process.wait()
Ejemplo n.º 20
0
def get_reconstructable_repository_from_origin_kwargs(kwargs):
    if kwargs.get('python_file'):
        _check_cli_arguments_none(kwargs, 'module_name')
        return ReconstructableRepository.for_file(kwargs.get('python_file'),
                                                  kwargs.get('attribute'))
    if kwargs.get('module_name'):
        return ReconstructableRepository.for_module(kwargs.get('module_name'),
                                                    kwargs.get('attribute'))
    check.failed('invalid')
Ejemplo n.º 21
0
def get_test_project_external_repo(container_image=None):
    return RepositoryLocation.from_handle(
        RepositoryLocationHandle.create_from_repository_location_origin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository.for_file(
                    file_relative_path(__file__, "test_pipelines/repo.py"),
                    "define_demo_execution_repo",
                    container_image=container_image,
                )))).get_repository("demo_execution_repo")
Ejemplo n.º 22
0
def define_in_process_context(python_file, fn_name, instance):
    check.inst_param(instance, "instance", DagsterInstance)

    return WorkspaceProcessContext(
        workspace=Workspace([
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository.for_file(python_file, fn_name))
        ]),
        instance=instance,
    ).create_request_context()
Ejemplo n.º 23
0
def get_test_project_workspace(instance, container_image=None):
    with in_process_test_workspace(
            instance,
            recon_repo=ReconstructableRepository.for_file(
                file_relative_path(__file__, "test_pipelines/repo.py"),
                "define_demo_execution_repo",
                container_image=container_image,
            ),
    ) as workspace:
        yield workspace
Ejemplo n.º 24
0
def test_dev_pipelines():
    with instance_for_test() as instance:
        recon_repo = ReconstructableRepository.for_file(
            __file__, "graph_job_dev_repo")
        dev_pipelines = graph_job_dev_repo.get_all_pipelines()
        assert len(dev_pipelines) == 5
        for pipeline in dev_pipelines:
            execute_pipeline(
                recon_repo.get_reconstructable_pipeline(pipeline.name),
                instance=instance,
            )
Ejemplo n.º 25
0
def define_subprocess_context_for_file(python_file, fn_name, instance):
    check.inst_param(instance, 'instance', DagsterInstance)
    return DagsterGraphQLContext(
        environments=[
            InProcessDagsterEnvironment(
                ReconstructableRepository.for_file(python_file, fn_name),
                execution_manager=SubprocessExecutionManager(instance),
            )
        ],
        instance=instance,
    )
Ejemplo n.º 26
0
def define_test_snapshot_context():
    return DagsterGraphQLContext(
        instance=DagsterInstance.ephemeral(),
        environments=[
            InProcessDagsterEnvironment(
                ReconstructableRepository.for_file(__file__,
                                                   'define_repository'),
                execution_manager=SynchronousExecutionManager(),
            )
        ],
    )
def test_skip():
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            instance = DagsterInstance.get()

            recon_repo = ReconstructableRepository.for_file(
                __file__, 'the_repo')
            skip = recon_repo.get_reconstructable_schedule('skip_schedule')
            result = sync_launch_scheduled_execution(skip.get_origin())

            assert isinstance(result, ScheduledExecutionSkipped)

            ticks = instance.get_schedule_ticks(skip.get_origin_id())
            assert ticks[0].status == ScheduleTickStatus.SKIPPED
Ejemplo n.º 28
0
def test_execute_hammer_through_dagit():
    recon_repo = ReconstructableRepository.for_file(
        file_relative_path(__file__, '../../../../examples/dagster_examples/toys/hammer.py'),
        'hammer_pipeline',
    )
    instance = DagsterInstance.local_temp()

    context = DagsterGraphQLContext(
        locations=[InProcessRepositoryLocation(recon_repo)], instance=instance,
    )

    selector = get_legacy_pipeline_selector(context, 'hammer_pipeline')

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'runConfigData': {
                'storage': {'filesystem': {}},
                'execution': {'dask': {'config': {'cluster': {'local': {}}}}},
            },
            'selector': selector,
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=START_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    if start_pipeline_result.errors:
        raise Exception('{}'.format(start_pipeline_result.errors))

    run_id = start_pipeline_result.data['startPipelineExecution']['run']['runId']

    context.drain_outstanding_executions()

    subscription = execute_dagster_graphql(context, SUBSCRIPTION_QUERY, variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [x['__typename'] for x in subscribe_results[0].data['pipelineRunLogs']['messages']]

    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
Ejemplo n.º 29
0
def test_bad_load():
    with _default_instance() as instance:
        instance = DagsterInstance.get()

        working_directory = os.path.dirname(__file__)
        recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory)
        schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist")

        result = sync_launch_scheduled_execution(schedule.get_origin())
        assert isinstance(result, ScheduledExecutionFailed)
        assert "doesnt_exist not found at module scope in file" in result.errors[0].to_string()

        ticks = instance.get_schedule_ticks(schedule.get_origin_id())
        assert ticks[0].status == ScheduleTickStatus.FAILURE
        assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
Ejemplo n.º 30
0
def test_bad_load():
    with schedule_instance() as instance:
        working_directory = os.path.dirname(__file__)
        recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory)
        schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist")
        fake_origin = schedule.get_origin()

        initial_datetime = datetime(
            year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(),
        )
        with freeze_time(initial_datetime) as frozen_datetime:
            schedule_state = ScheduleState(
                fake_origin,
                ScheduleStatus.RUNNING,
                "0 0 * * *",
                get_timestamp_from_utc_datetime(get_current_datetime_in_utc()),
            )
            instance.add_schedule_state(schedule_state)

            frozen_datetime.tick(delta=timedelta(seconds=1))

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 1
            assert ticks[0].status == ScheduleTickStatus.FAILURE
            assert ticks[0].timestamp == get_timestamp_from_utc_datetime(
                get_current_datetime_in_utc()
            )
            assert "doesnt_exist not found at module scope in file" in ticks[0].error.message

            frozen_datetime.tick(delta=timedelta(days=1))

            launch_scheduled_runs(instance, get_current_datetime_in_utc())

            assert instance.get_runs_count() == 0

            ticks = instance.get_schedule_ticks(fake_origin.get_id())

            assert len(ticks) == 2
            assert ticks[0].status == ScheduleTickStatus.FAILURE
            assert ticks[0].timestamp == get_timestamp_from_utc_datetime(
                get_current_datetime_in_utc()
            )
            assert "doesnt_exist not found at module scope in file" in ticks[0].error.message