def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') run_storage = InMemoryRunStorage() pipeline_run = run_storage.create_run( run_storage=run_storage, pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, env_config=env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def construct_run_storage(self): if self.storage_mode == 'filesystem': return FileSystemRunStorage() elif self.storage_mode == 'in_memory': return InMemoryRunStorage() elif self.storage_mode == 's3': # TODO: Revisit whether we want to use S3 run storage return FileSystemRunStorage() elif self.storage_mode is None: return InMemoryRunStorage() else: raise DagsterInvariantViolationError( 'Invalid storage specified {}'.format(self.storage_mode))
def create_test_pipeline_execution_context( logger_defs=None, scoped_resources_builder=None, tags=None, run_config_loggers=None ): run_id = str(uuid.uuid4()) loggers = check.opt_dict_param( logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition ) mode_def = ModeDefinition(logger_defs=loggers) pipeline_def = PipelineDefinition( name='test_legacy_context', solid_defs=[], mode_defs=[mode_def] ) run_config_loggers = check.opt_list_param( run_config_loggers, 'run_config_loggers', of_type=logging.Logger ) run_config = RunConfig(run_id, tags=tags, loggers=run_config_loggers) environment_dict = {'loggers': {key: {} for key in loggers}} creation_data = create_context_creation_data(pipeline_def, environment_dict, run_config) log_manager = create_log_manager(creation_data) scoped_resources_builder = check.opt_inst_param( scoped_resources_builder, 'scoped_resources_builder', ScopedResourcesBuilder, default=ScopedResourcesBuilder(), ) return construct_pipeline_execution_context( context_creation_data=creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=SystemStorageData( run_storage=InMemoryRunStorage(), intermediates_manager=InMemoryIntermediatesManager(), file_manager=LocalFileManager.for_run_id(run_id), ), log_manager=log_manager, )
def execute_query( handle, query, variables=None, pipeline_run_storage=None, scheduler=None, raise_on_error=False, use_sync_executor=False, ): check.inst_param(handle, 'handle', ExecutionTargetHandle) check.str_param(query, 'query') check.opt_dict_param(variables, 'variables') # We allow external creation of the pipeline_run_storage to support testing contexts where we # need access to the underlying run storage check.opt_inst_param(pipeline_run_storage, 'pipeline_run_storage', RunStorage) check.opt_inst_param(scheduler, 'scheduler', Scheduler) check.bool_param(raise_on_error, 'raise_on_error') check.bool_param(use_sync_executor, 'use_sync_executor') query = query.strip('\'" \n\t') execution_manager = SynchronousExecutionManager() pipeline_run_storage = pipeline_run_storage or InMemoryRunStorage() context = DagsterGraphQLContext( handle=handle, pipeline_runs=pipeline_run_storage, scheduler=scheduler, execution_manager=execution_manager, raise_on_error=raise_on_error, version=__version__, ) executor = SyncExecutor() if use_sync_executor else GeventExecutor() result = graphql( request_string=query, schema=create_schema(), context=context, variables=variables, executor=executor, ) result_dict = result.to_dict() # Here we detect if this is in fact an error response # If so, we iterate over the result_dict and the original result # which contains a GraphQLError. If that GraphQL error contains # an original_error property (which is the exception the resolver # has thrown, typically) we serialize the stack trace of that exception # in the 'stack_trace' property of each error to ease debugging if 'errors' in result_dict: check.invariant(len(result_dict['errors']) == len(result.errors)) for python_error, error_dict in zip(result.errors, result_dict['errors']): if hasattr(python_error, 'original_error') and python_error.original_error: error_dict['stack_trace'] = get_stack_trace_array(python_error.original_error) return result_dict
def test_get_schedule(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_test_context(instance) # Initialize scheduler repository = context.legacy_get_repository_definition() instance.reconcile_scheduler_state( repository=repository, python_path='/path/to/python', repository_path='/path/to/repository', ) result = execute_dagster_graphql( context, GET_SCHEDULE, variables={'scheduleName': 'partition_based_multi_mode_decorator'}, ) assert result.data assert result.data['scheduleOrError']['__typename'] == 'RunningSchedule' assert result.data['scheduleOrError']['scheduleDefinition']['partitionSet']
def create_test_pipeline_execution_context( loggers=None, resources=None, tags=None, run_config_loggers=None ): run_id = str(uuid.uuid4()) loggers = check.opt_dict_param(loggers, 'loggers', key_type=str, value_type=LoggerDefinition) mode_def = ModeDefinition(loggers=loggers) pipeline_def = PipelineDefinition( name='test_legacy_context', solids=[], mode_definitions=[mode_def] ) run_config_loggers = check.opt_list_param( run_config_loggers, 'run_config_loggers', of_type=logging.Logger ) run_config = RunConfig(run_id, tags=tags, loggers=run_config_loggers) environment_config = create_environment_config( pipeline_def, {'loggers': {key: {} for key in loggers}} ) loggers = _create_loggers(environment_config, run_config, pipeline_def, mode_def) log_manager = DagsterLogManager(run_config.run_id, {}, loggers) return construct_pipeline_execution_context( run_config=run_config, pipeline_def=pipeline_def, resources=resources, environment_config=environment_config, run_storage=InMemoryRunStorage(), intermediates_manager=InMemoryIntermediatesManager(), log_manager=log_manager, )
def create_mem_system_storage_data(init_context): return SystemStorageData( run_storage=InMemoryRunStorage(), intermediates_manager=InMemoryIntermediatesManager(), file_manager=LocalFileManager.for_run_id( init_context.run_config.run_id), )
def test_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml( path=script_relative_path('../repository.yaml'), instance=instance ) result = execute_dagster_graphql( context=context, query=LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': {'selector': {'name': 'no_config_pipeline'}, 'mode': 'default'} }, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineExecutionSuccess' assert result.data['launchPipelineExecution']['run']['status'] == 'NOT_STARTED' run_id = result.data['launchPipelineExecution']['run']['runId'] test_queue.run_one(instance) result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
def run_test_backfill(execution_args, expected_count=None, error_message=None): runner = CliRunner() run_launcher = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) with mock.patch( 'dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance if error_message: with pytest.raises(UsageError) as error_info: execute_backfill_command( backfill_execute_args(execution_args), no_print) assert error_info and error_message in error_info.value.message result = runner.invoke(pipeline_backfill_command, backfill_cli_runner_args(execution_args)) if error_message: assert result.exit_code == 2 else: assert result.exit_code == 0 if expected_count: assert len(run_launcher.queue()) == expected_count
def test_create_app(): handle = ExecutionTargetHandle.for_repo_yaml( script_relative_path('./repository.yaml')) pipeline_run_storage = InMemoryRunStorage() assert create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=True)
def test_pipelines_python_error(): ctx = DagsterGraphQLContext( handle=ExecutionTargetHandle.for_repo_fn(define_error_pipeline_repo), pipeline_runs=InMemoryRunStorage(), execution_manager=SynchronousExecutionManager(), ) result = execute_dagster_graphql(ctx, PIPELINES) assert result.data['pipelinesOrError']['__typename'] == "PythonError"
def define_context(raise_on_error=True, log_dir=None, schedule_dir=None): return DagsterGraphQLContext( handle=ExecutionTargetHandle.for_repo_fn(define_repository), pipeline_runs=FilesystemRunStorage(base_dir=log_dir) if log_dir else InMemoryRunStorage(), scheduler=TestSystemCronScheduler(schedule_dir) if schedule_dir else None, execution_manager=SynchronousExecutionManager(), raise_on_error=raise_on_error, )
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def define_examples_context(raise_on_error=True): return DagsterGraphQLContext( handle=ExecutionTargetHandle.for_repo_module('dagster_examples', 'define_demo_repo'), pipeline_runs=InMemoryRunStorage(), execution_manager=SynchronousExecutionManager(), raise_on_error=raise_on_error, )
def test_get_all_schedules(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.legacy_get_repository_definition() reconcile_scheduler_state( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule schedule = instance.start_schedule( repository.name, "no_config_pipeline_hourly_schedule") # Query Scheduler + all Schedules scheduler_result = execute_dagster_graphql(context, GET_SCHEDULES_QUERY) # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py # If you add a schedule there, be sure to update the number of schedules below assert scheduler_result.data assert scheduler_result.data['scheduler'] assert scheduler_result.data['scheduler']['runningSchedules'] assert len( scheduler_result.data['scheduler']['runningSchedules']) == 18 for schedule in scheduler_result.data['scheduler']['runningSchedules']: if schedule['scheduleDefinition'][ 'name'] == 'no_config_pipeline_hourly_schedule': assert schedule['status'] == 'RUNNING' if schedule['scheduleDefinition'][ 'name'] == 'environment_dict_error_schedule': assert schedule['scheduleDefinition']['runConfigYaml'] is None elif schedule['scheduleDefinition'][ 'name'] == 'invalid_config_schedule': assert (schedule['scheduleDefinition']['runConfigYaml'] == 'solids:\n takes_an_enum:\n config: invalid\n') else: assert (schedule['scheduleDefinition']['runConfigYaml'] == 'storage:\n filesystem: {}\n')
def get_instance(temp_dir, event_log_storage): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=event_log_storage, compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), )
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), scheduler=FilesystemTestScheduler(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def test_index_view(): with create_app( ExecutionTargetHandle.for_repo_yaml( script_relative_path('./repository.yaml')), InMemoryRunStorage(), ).test_client() as client: res = client.get('/') assert res.status_code == 200, res.data assert b'You need to enable JavaScript to run this app' in res.data
def define_scheduler_instance(tempdir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(tempdir), schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, 'schedules')), scheduler=SystemCronScheduler(os.path.join(tempdir, 'schedules')), )
def _readonly_in_memory_instance(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=ExplodingRunLauncher(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), )
def _sqlite_asset_instance(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=ConsolidatedSqliteEventLogStorage(temp_dir), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) yield instance
def define_scheduler_instance(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesytemTestScheduler(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'passing_pipeline') env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') run_storage = InMemoryRunStorage() pipeline_run = run_storage.create_run( pipeline_name=passing_pipeline.name, run_id=run_id, selector=selector, env_config=env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def get_instance_with_launcher(temp_dir): test_queue = InMemoryRunLauncher() return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, )
def _in_memory_instance(): with tempfile.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=SyncInMemoryRunLauncher(), run_coordinator=DefaultRunCoordinator(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), )
def mocked_instance(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=InMemoryRunLauncher(), ) with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance yield instance
def test_notebook_view(): notebook_path = script_relative_path('render_uuid_notebook.ipynb') with create_app( ExecutionTargetHandle.for_repo_yaml( script_relative_path('./repository.yaml')), InMemoryRunStorage(), ).test_client() as client: res = client.get('/dagit/notebook?path={}'.format(notebook_path)) assert res.status_code == 200 # This magic guid is hardcoded in the notebook assert b'6cac0c38-2c97-49ca-887c-4ac43f141213' in res.data
def define_scheduler_instance(tempdir): with pytest.warns(UserWarning, match="`SystemCronScheduler` is deprecated"): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, "schedules")), scheduler=SystemCronScheduler(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), )
def _sqlite_asset_instance(): with tempfile.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=ConsolidatedSqliteEventLogStorage(temp_dir), compute_log_manager=LocalComputeLogManager(temp_dir), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), scheduler=FilesystemTestScheduler(temp_dir), ) yield instance
def test_pipelines_or_error_invalid(): context = DagsterGraphQLContext( handle=ExecutionTargetHandle.for_repo_fn(define_test_repository), pipeline_runs=InMemoryRunStorage(), execution_manager=SynchronousExecutionManager(), ) result = execute_dagster_graphql( context, '{ pipelinesOrError { ... on InvalidDefinitionError { message } } }') msg = result.data['pipelinesOrError']['message'] assert 'circular reference detected in solid "csolid"' in msg