def run_test_backfill(execution_args, expected_count=None, error_message=None): runner = CliRunner() run_launcher = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) with mock.patch( 'dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance if error_message: with pytest.raises(UsageError) as error_info: execute_backfill_command( backfill_execute_args(execution_args), no_print) assert error_info and error_message in error_info.value.message result = runner.invoke(pipeline_backfill_command, backfill_cli_runner_args(execution_args)) if error_message: assert result.exit_code == 2 else: assert result.exit_code == 0 if expected_count: assert len(run_launcher.queue()) == expected_count
def test_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml( path=script_relative_path('../repository.yaml'), instance=instance ) result = execute_dagster_graphql( context=context, query=LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': {'selector': {'name': 'no_config_pipeline'}, 'mode': 'default'} }, ) assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineExecutionSuccess' assert result.data['launchPipelineExecution']['run']['status'] == 'NOT_STARTED' run_id = result.data['launchPipelineExecution']['run']['runId'] test_queue.run_one(instance) result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
def test_get_schedule(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_test_context(instance) # Initialize scheduler repository = context.legacy_get_repository_definition() instance.reconcile_scheduler_state( repository=repository, python_path='/path/to/python', repository_path='/path/to/repository', ) result = execute_dagster_graphql( context, GET_SCHEDULE, variables={'scheduleName': 'partition_based_multi_mode_decorator'}, ) assert result.data assert result.data['scheduleOrError']['__typename'] == 'RunningSchedule' assert result.data['scheduleOrError']['scheduleDefinition']['partitionSet']
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def test_get_all_schedules(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.legacy_get_repository_definition() reconcile_scheduler_state( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule schedule = instance.start_schedule( repository.name, "no_config_pipeline_hourly_schedule") # Query Scheduler + all Schedules scheduler_result = execute_dagster_graphql(context, GET_SCHEDULES_QUERY) # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py # If you add a schedule there, be sure to update the number of schedules below assert scheduler_result.data assert scheduler_result.data['scheduler'] assert scheduler_result.data['scheduler']['runningSchedules'] assert len( scheduler_result.data['scheduler']['runningSchedules']) == 18 for schedule in scheduler_result.data['scheduler']['runningSchedules']: if schedule['scheduleDefinition'][ 'name'] == 'no_config_pipeline_hourly_schedule': assert schedule['status'] == 'RUNNING' if schedule['scheduleDefinition'][ 'name'] == 'environment_dict_error_schedule': assert schedule['scheduleDefinition']['runConfigYaml'] is None elif schedule['scheduleDefinition'][ 'name'] == 'invalid_config_schedule': assert (schedule['scheduleDefinition']['runConfigYaml'] == 'solids:\n takes_an_enum:\n config: invalid\n') else: assert (schedule['scheduleDefinition']['runConfigYaml'] == 'storage:\n filesystem: {}\n')
def get_instance(temp_dir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), scheduler=FilesystemTestScheduler(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def define_scheduler_instance(tempdir): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(tempdir), schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, 'schedules')), scheduler=SystemCronScheduler(os.path.join(tempdir, 'schedules')), )
def define_scheduler_instance(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesytemTestScheduler(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), )
def _readonly_in_memory_instance(): with seven.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=ExplodingRunLauncher(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), )
def get_instance(): with tempfile.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=SqliteRunStorage.from_local(temp_dir), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), )
def get_instance_with_launcher(temp_dir): test_queue = InMemoryRunLauncher() return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, )
def _non_launchable_in_memory_instance(): with tempfile.TemporaryDirectory() as temp_dir: yield DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=ExplodingRunLauncher(), run_coordinator=DefaultRunCoordinator(), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), )
def mocked_instance(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=InMemoryRunLauncher(), ) with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance yield instance
def define_scheduler_instance(tempdir): with pytest.warns(UserWarning, match="`SystemCronScheduler` is deprecated"): return DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, "schedules")), scheduler=SystemCronScheduler(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), )
def test_in_memory_event_log_storage_store_events_and_wipe(): storage = InMemoryEventLogStorage() assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), )) assert len(storage.get_logs_for_run('foo')) == 1 storage.wipe() assert len(storage.get_logs_for_run('foo')) == 0
def ephemeral(tempdir=None): from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.root import LocalArtifactStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() return DagsterInstance( InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(compute_logs_directory(tempdir)), )
def ephemeral(tempdir=None): from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.runs import InMemoryRunStorage if tempdir is None: tempdir = DagsterInstance.temp_storage() feature_set = _dagster_feature_set(tempdir) return DagsterInstance( InstanceType.EPHEMERAL, root_storage_dir=tempdir, run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), feature_set=feature_set, )
def ephemeral(tempdir=None): from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() feature_set = _dagster_feature_set(tempdir) return DagsterInstance( InstanceType.EPHEMERAL, root_storage_dir=tempdir, run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(_compute_logs_base_directory(tempdir)), feature_set=feature_set, )
def ephemeral(tempdir=None, preload=None): from dagster.core.launcher.sync_in_memory_run_launcher import SyncInMemoryRunLauncher from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.root import LocalArtifactStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.noop_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() return DagsterInstance( InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(preload=preload), event_storage=InMemoryEventLogStorage(preload=preload), compute_log_manager=NoOpComputeLogManager(), run_launcher=SyncInMemoryRunLauncher(), )
def test_get_all_schedules(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesytemTestScheduler(temp_dir), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.get_repository() scheduler_handle = context.scheduler_handle scheduler_handle.up( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule schedule = instance.start_schedule( repository, "no_config_pipeline_hourly_schedule") # Query Scheduler + all Schedules scheduler_result = execute_dagster_graphql(context, GET_SCHEDULES_QUERY) assert scheduler_result.data assert scheduler_result.data['scheduler'] assert scheduler_result.data['scheduler']['runningSchedules'] assert len( scheduler_result.data['scheduler']['runningSchedules']) == 11 for schedule in scheduler_result.data['scheduler']['runningSchedules']: assert (schedule['scheduleDefinition']['environmentConfigYaml'] == 'storage:\n filesystem: {}\n')
def test_basic_start_scheduled_execution_with_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) scheduler_handle = context.scheduler_handle scheduler_handle.up(python_path=sys.executable, repository_path=file_relative_path( __file__, '../')) result = execute_dagster_graphql( context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert not result.errors assert result.data # just test existence assert (result.data['startScheduledExecution']['__typename'] == 'LaunchPipelineExecutionSuccess') assert uuid.UUID( result.data['startScheduledExecution']['run']['runId']) assert (result.data['startScheduledExecution']['run']['pipeline'] ['name'] == 'no_config_pipeline') assert any( tag['key'] == 'dagster/schedule_name' and tag['value'] == 'no_config_pipeline_hourly_schedule' for tag in result.data['startScheduledExecution']['run']['tags'])
def test_start_stop_schedule(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.legacy_get_repository_definition() reconcile_scheduler_state( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule start_result = execute_dagster_graphql( context, START_SCHEDULES_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert start_result.data['startSchedule']['schedule'][ 'status'] == 'RUNNING' # Stop schedule stop_result = execute_dagster_graphql( context, STOP_SCHEDULES_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert stop_result.data['stopRunningSchedule']['schedule'][ 'status'] == 'STOPPED'
def run_launch(execution_args, expected_count=None): runner = CliRunner() run_launcher = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance result = runner.invoke(pipeline_launch_command, execution_args) assert result.exit_code == 0, result.stdout if expected_count: assert len(run_launcher.queue()) == expected_count
def test_run_record_timestamps(self, storage): assert storage self._skip_in_memory(storage) @op def a(): pass @job def my_job(): a() with tempfile.TemporaryDirectory() as temp_dir: if storage._instance: # pylint: disable=protected-access instance = storage._instance # pylint: disable=protected-access else: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=storage, event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), ) freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): result = my_job.execute_in_process(instance=instance) records = instance.get_run_records( filters=PipelineRunsFilter(run_ids=[result.run_id]) ) assert len(records) == 1 record = records[0] assert record.start_time == freeze_datetime.timestamp() assert record.end_time == freeze_datetime.timestamp()
def event_log_storage(self): # pylint: disable=arguments-differ return InMemoryEventLogStorage()
def event_log_storage(self): # pylint: disable=arguments-differ storage = InMemoryEventLogStorage() try: yield storage finally: storage.dispose()
def create_in_memory_event_log_storage(): yield InMemoryEventLogStorage()
def test_event_log_storage_watch(event_storage_factory_cm_fn): def evt(name): return DagsterEventRecord( None, name, 'debug', '', 'foo', time.time(), dagster_event=DagsterEvent( DagsterEventType.ENGINE_EVENT.value, 'nonce', event_specific_data=EngineEventData.in_process(999), ), ) with event_storage_factory_cm_fn() as storage: watched = [] watcher = lambda x: watched.append(x) # pylint: disable=unnecessary-lambda storage = InMemoryEventLogStorage() assert len(storage.get_logs_for_run('foo')) == 0 storage.store_event(evt('Message1')) assert len(storage.get_logs_for_run('foo')) == 1 assert len(watched) == 0 storage.watch('foo', None, watcher) storage.store_event(evt('Message2')) assert len(storage.get_logs_for_run('foo')) == 2 assert len(watched) == 1 storage.end_watch('foo', lambda event: None) storage.store_event(evt('Message3')) assert len(storage.get_logs_for_run('foo')) == 3 assert len(watched) == 2 storage.end_watch('bar', lambda event: None) storage.store_event(evt('Message4')) assert len(storage.get_logs_for_run('foo')) == 4 assert len(watched) == 3 time.sleep( 0.5 ) # this value scientifically selected from a range of attractive values storage.end_watch('foo', watcher) time.sleep(0.5) storage.store_event(evt('Message5')) assert len(storage.get_logs_for_run('foo')) == 5 assert len(watched) == 3 storage.delete_events('foo') assert len(storage.get_logs_for_run('foo')) == 0 assert len(watched) == 3
def create_in_memory_event_log_instance(): with seven.TemporaryDirectory() as temp_dir: asset_storage = InMemoryEventLogStorage() instance = get_instance(temp_dir, asset_storage) yield [instance, asset_storage]
def test_in_memory_event_log_storage_run_not_found(): storage = InMemoryEventLogStorage() assert storage.get_logs_for_run('bar') == []