Exemple #1
0
def test_failing():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'failing_pipeline')
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')
    run_storage = InMemoryRunStorage()
    pipeline_run = run_storage.create_run(
        run_storage=run_storage,
        pipeline_name=failing_pipeline.name,
        run_id=run_id,
        selector=selector,
        env_config=env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       failing_pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    assert pipeline_run.all_logs()
Exemple #2
0
 def construct_run_storage(self):
     if self.storage_mode == 'filesystem':
         return FileSystemRunStorage()
     elif self.storage_mode == 'in_memory':
         return InMemoryRunStorage()
     elif self.storage_mode == 's3':
         # TODO: Revisit whether we want to use S3 run storage
         return FileSystemRunStorage()
     elif self.storage_mode is None:
         return InMemoryRunStorage()
     else:
         raise DagsterInvariantViolationError(
             'Invalid storage specified {}'.format(self.storage_mode))
Exemple #3
0
def create_test_pipeline_execution_context(
    logger_defs=None, scoped_resources_builder=None, tags=None, run_config_loggers=None
):
    run_id = str(uuid.uuid4())
    loggers = check.opt_dict_param(
        logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name='test_legacy_context', solid_defs=[], mode_defs=[mode_def]
    )
    run_config_loggers = check.opt_list_param(
        run_config_loggers, 'run_config_loggers', of_type=logging.Logger
    )
    run_config = RunConfig(run_id, tags=tags, loggers=run_config_loggers)
    environment_dict = {'loggers': {key: {} for key in loggers}}
    creation_data = create_context_creation_data(pipeline_def, environment_dict, run_config)
    log_manager = create_log_manager(creation_data)

    scoped_resources_builder = check.opt_inst_param(
        scoped_resources_builder,
        'scoped_resources_builder',
        ScopedResourcesBuilder,
        default=ScopedResourcesBuilder(),
    )
    return construct_pipeline_execution_context(
        context_creation_data=creation_data,
        scoped_resources_builder=scoped_resources_builder,
        system_storage_data=SystemStorageData(
            run_storage=InMemoryRunStorage(),
            intermediates_manager=InMemoryIntermediatesManager(),
            file_manager=LocalFileManager.for_run_id(run_id),
        ),
        log_manager=log_manager,
    )
Exemple #4
0
def execute_query(
    handle,
    query,
    variables=None,
    pipeline_run_storage=None,
    scheduler=None,
    raise_on_error=False,
    use_sync_executor=False,
):
    check.inst_param(handle, 'handle', ExecutionTargetHandle)
    check.str_param(query, 'query')
    check.opt_dict_param(variables, 'variables')
    # We allow external creation of the pipeline_run_storage to support testing contexts where we
    # need access to the underlying run storage
    check.opt_inst_param(pipeline_run_storage, 'pipeline_run_storage', RunStorage)
    check.opt_inst_param(scheduler, 'scheduler', Scheduler)
    check.bool_param(raise_on_error, 'raise_on_error')
    check.bool_param(use_sync_executor, 'use_sync_executor')

    query = query.strip('\'" \n\t')

    execution_manager = SynchronousExecutionManager()

    pipeline_run_storage = pipeline_run_storage or InMemoryRunStorage()

    context = DagsterGraphQLContext(
        handle=handle,
        pipeline_runs=pipeline_run_storage,
        scheduler=scheduler,
        execution_manager=execution_manager,
        raise_on_error=raise_on_error,
        version=__version__,
    )

    executor = SyncExecutor() if use_sync_executor else GeventExecutor()

    result = graphql(
        request_string=query,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    result_dict = result.to_dict()

    # Here we detect if this is in fact an error response
    # If so, we iterate over the result_dict and the original result
    # which contains a GraphQLError. If that GraphQL error contains
    # an original_error property (which is the exception the resolver
    # has thrown, typically) we serialize the stack trace of that exception
    # in the 'stack_trace' property of each error to ease debugging

    if 'errors' in result_dict:
        check.invariant(len(result_dict['errors']) == len(result.errors))
        for python_error, error_dict in zip(result.errors, result_dict['errors']):
            if hasattr(python_error, 'original_error') and python_error.original_error:
                error_dict['stack_trace'] = get_stack_trace_array(python_error.original_error)

    return result_dict
Exemple #5
0
def test_get_schedule():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_test_context(instance)
        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        instance.reconcile_scheduler_state(
            repository=repository,
            python_path='/path/to/python',
            repository_path='/path/to/repository',
        )

        result = execute_dagster_graphql(
            context,
            GET_SCHEDULE,
            variables={'scheduleName': 'partition_based_multi_mode_decorator'},
        )

        assert result.data
        assert result.data['scheduleOrError']['__typename'] == 'RunningSchedule'
        assert result.data['scheduleOrError']['scheduleDefinition']['partitionSet']
Exemple #6
0
def create_test_pipeline_execution_context(
    loggers=None, resources=None, tags=None, run_config_loggers=None
):
    run_id = str(uuid.uuid4())
    loggers = check.opt_dict_param(loggers, 'loggers', key_type=str, value_type=LoggerDefinition)
    mode_def = ModeDefinition(loggers=loggers)
    pipeline_def = PipelineDefinition(
        name='test_legacy_context', solids=[], mode_definitions=[mode_def]
    )
    run_config_loggers = check.opt_list_param(
        run_config_loggers, 'run_config_loggers', of_type=logging.Logger
    )
    run_config = RunConfig(run_id, tags=tags, loggers=run_config_loggers)
    environment_config = create_environment_config(
        pipeline_def, {'loggers': {key: {} for key in loggers}}
    )
    loggers = _create_loggers(environment_config, run_config, pipeline_def, mode_def)
    log_manager = DagsterLogManager(run_config.run_id, {}, loggers)

    return construct_pipeline_execution_context(
        run_config=run_config,
        pipeline_def=pipeline_def,
        resources=resources,
        environment_config=environment_config,
        run_storage=InMemoryRunStorage(),
        intermediates_manager=InMemoryIntermediatesManager(),
        log_manager=log_manager,
    )
Exemple #7
0
def create_mem_system_storage_data(init_context):
    return SystemStorageData(
        run_storage=InMemoryRunStorage(),
        intermediates_manager=InMemoryIntermediatesManager(),
        file_manager=LocalFileManager.for_run_id(
            init_context.run_config.run_id),
    )
Exemple #8
0
def test_run_launcher():
    test_queue = InMemoryRunLauncher()

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=test_queue,
        )

    context = define_context_for_repository_yaml(
        path=script_relative_path('../repository.yaml'), instance=instance
    )

    result = execute_dagster_graphql(
        context=context,
        query=LAUNCH_PIPELINE_EXECUTION_MUTATION,
        variables={
            'executionParams': {'selector': {'name': 'no_config_pipeline'}, 'mode': 'default'}
        },
    )

    assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineExecutionSuccess'
    assert result.data['launchPipelineExecution']['run']['status'] == 'NOT_STARTED'

    run_id = result.data['launchPipelineExecution']['run']['runId']

    test_queue.run_one(instance)

    result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id})
    assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun'
    assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
Exemple #9
0
def run_test_backfill(execution_args, expected_count=None, error_message=None):
    runner = CliRunner()
    run_launcher = InMemoryRunLauncher()
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
        )
        with mock.patch(
                'dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance

            if error_message:
                with pytest.raises(UsageError) as error_info:
                    execute_backfill_command(
                        backfill_execute_args(execution_args), no_print)
                assert error_info and error_message in error_info.value.message

            result = runner.invoke(pipeline_backfill_command,
                                   backfill_cli_runner_args(execution_args))
            if error_message:
                assert result.exit_code == 2
            else:
                assert result.exit_code == 0
                if expected_count:
                    assert len(run_launcher.queue()) == expected_count
Exemple #10
0
def test_create_app():
    handle = ExecutionTargetHandle.for_repo_yaml(
        script_relative_path('./repository.yaml'))
    pipeline_run_storage = InMemoryRunStorage()
    assert create_app(handle,
                      pipeline_run_storage,
                      use_synchronous_execution_manager=True)
Exemple #11
0
def test_pipelines_python_error():
    ctx = DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_fn(define_error_pipeline_repo),
        pipeline_runs=InMemoryRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )
    result = execute_dagster_graphql(ctx, PIPELINES)
    assert result.data['pipelinesOrError']['__typename'] == "PythonError"
Exemple #12
0
def define_context(raise_on_error=True, log_dir=None, schedule_dir=None):
    return DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_fn(define_repository),
        pipeline_runs=FilesystemRunStorage(base_dir=log_dir) if log_dir else InMemoryRunStorage(),
        scheduler=TestSystemCronScheduler(schedule_dir) if schedule_dir else None,
        execution_manager=SynchronousExecutionManager(),
        raise_on_error=raise_on_error,
    )
def get_instance(temp_dir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
    )
Exemple #14
0
def define_examples_context(raise_on_error=True):
    return DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_module('dagster_examples',
                                                     'define_demo_repo'),
        pipeline_runs=InMemoryRunStorage(),
        execution_manager=SynchronousExecutionManager(),
        raise_on_error=raise_on_error,
    )
Exemple #15
0
def test_get_all_schedules():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(
            repository.name, "no_config_pipeline_hourly_schedule")

        # Query Scheduler + all Schedules
        scheduler_result = execute_dagster_graphql(context,
                                                   GET_SCHEDULES_QUERY)

        # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py
        # If you add a schedule there, be sure to update the number of schedules below
        assert scheduler_result.data
        assert scheduler_result.data['scheduler']
        assert scheduler_result.data['scheduler']['runningSchedules']
        assert len(
            scheduler_result.data['scheduler']['runningSchedules']) == 18

        for schedule in scheduler_result.data['scheduler']['runningSchedules']:
            if schedule['scheduleDefinition'][
                    'name'] == 'no_config_pipeline_hourly_schedule':
                assert schedule['status'] == 'RUNNING'

            if schedule['scheduleDefinition'][
                    'name'] == 'environment_dict_error_schedule':
                assert schedule['scheduleDefinition']['runConfigYaml'] is None
            elif schedule['scheduleDefinition'][
                    'name'] == 'invalid_config_schedule':
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'solids:\n  takes_an_enum:\n    config: invalid\n')
            else:
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'storage:\n  filesystem: {}\n')
Exemple #16
0
def get_instance(temp_dir, event_log_storage):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=event_log_storage,
        compute_log_manager=NoOpComputeLogManager(),
        run_coordinator=DefaultRunCoordinator(),
        run_launcher=SyncInMemoryRunLauncher(),
    )
def get_instance(temp_dir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        scheduler=FilesystemTestScheduler(temp_dir),
        schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
    )
Exemple #18
0
def test_index_view():
    with create_app(
            ExecutionTargetHandle.for_repo_yaml(
                script_relative_path('./repository.yaml')),
            InMemoryRunStorage(),
    ).test_client() as client:
        res = client.get('/')

    assert res.status_code == 200, res.data
    assert b'You need to enable JavaScript to run this app' in res.data
def define_scheduler_instance(tempdir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        compute_log_manager=NoOpComputeLogManager(tempdir),
        schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, 'schedules')),
        scheduler=SystemCronScheduler(os.path.join(tempdir, 'schedules')),
    )
 def _readonly_in_memory_instance():
     with seven.TemporaryDirectory() as temp_dir:
         yield DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=InMemoryEventLogStorage(),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_launcher=ExplodingRunLauncher(),
             schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
         )
 def _sqlite_asset_instance():
     with seven.TemporaryDirectory() as temp_dir:
         instance = DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=ConsolidatedSqliteEventLogStorage(temp_dir),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_launcher=SyncInMemoryRunLauncher(),
         )
         yield instance
Exemple #22
0
def define_scheduler_instance():
    with seven.TemporaryDirectory() as temp_dir:
        yield DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesytemTestScheduler(temp_dir),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
        )
Exemple #23
0
def test_running():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'passing_pipeline')
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')
    run_storage = InMemoryRunStorage()
    pipeline_run = run_storage.create_run(
        pipeline_name=passing_pipeline.name,
        run_id=run_id,
        selector=selector,
        env_config=env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       passing_pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.SUCCESS
    events = pipeline_run.all_logs()
    assert events

    process_start_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1
Exemple #24
0
def get_instance_with_launcher(temp_dir):
    test_queue = InMemoryRunLauncher()

    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
        run_launcher=test_queue,
    )
 def _in_memory_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         yield DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=InMemoryEventLogStorage(),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_launcher=SyncInMemoryRunLauncher(),
             run_coordinator=DefaultRunCoordinator(),
             schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
         )
Exemple #26
0
def mocked_instance():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=InMemoryRunLauncher(),
        )
        with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance
            yield instance
Exemple #27
0
def test_notebook_view():
    notebook_path = script_relative_path('render_uuid_notebook.ipynb')

    with create_app(
            ExecutionTargetHandle.for_repo_yaml(
                script_relative_path('./repository.yaml')),
            InMemoryRunStorage(),
    ).test_client() as client:
        res = client.get('/dagit/notebook?path={}'.format(notebook_path))

    assert res.status_code == 200
    # This magic guid is hardcoded in the notebook
    assert b'6cac0c38-2c97-49ca-887c-4ac43f141213' in res.data
def define_scheduler_instance(tempdir):
    with pytest.warns(UserWarning, match="`SystemCronScheduler` is deprecated"):
        return DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, "schedules")),
            scheduler=SystemCronScheduler(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
Exemple #29
0
 def _sqlite_asset_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         instance = DagsterInstance(
             instance_type=InstanceType.EPHEMERAL,
             local_artifact_storage=LocalArtifactStorage(temp_dir),
             run_storage=InMemoryRunStorage(),
             event_storage=ConsolidatedSqliteEventLogStorage(temp_dir),
             compute_log_manager=LocalComputeLogManager(temp_dir),
             run_coordinator=DefaultRunCoordinator(),
             run_launcher=SyncInMemoryRunLauncher(),
             scheduler=FilesystemTestScheduler(temp_dir),
         )
         yield instance
Exemple #30
0
def test_pipelines_or_error_invalid():

    context = DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_fn(define_test_repository),
        pipeline_runs=InMemoryRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )

    result = execute_dagster_graphql(
        context,
        '{ pipelinesOrError { ... on InvalidDefinitionError { message } } }')
    msg = result.data['pipelinesOrError']['message']
    assert 'circular reference detected in solid "csolid"' in msg