def ephemeral(tempdir=None): from dagster.core.launcher.sync_in_memory_run_launcher import SyncInMemoryRunLauncher from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.root import LocalArtifactStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.noop_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() return DagsterInstance( InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=SyncInMemoryRunLauncher(), )
def dagster_instance(helm_postgres_url): # pylint: disable=redefined-outer-name with tempfile.TemporaryDirectory() as tempdir: with environ({"DAGSTER_HOME": tempdir}): with DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(helm_postgres_url), event_storage=PostgresEventLogStorage(helm_postgres_url), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=ExplodingRunLauncher(), # use graphql to launch any runs ref=InstanceRef.from_dir(tempdir), ) as instance: yield instance check_export_runs(instance)
def dagster_instance_for_k8s_run_launcher( helm_postgres_url_for_k8s_run_launcher, run_launcher): # pylint: disable=redefined-outer-name tempdir = DagsterInstance.temp_storage() with DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage( helm_postgres_url_for_k8s_run_launcher), event_storage=PostgresEventLogStorage( helm_postgres_url_for_k8s_run_launcher), schedule_storage=PostgresScheduleStorage( helm_postgres_url_for_k8s_run_launcher), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=run_launcher, ) as instance: yield instance
def dagster_instance(helm_namespace, run_launcher): # pylint: disable=redefined-outer-name tempdir = DagsterInstance.temp_storage() with local_port_forward_postgres( namespace=helm_namespace) as local_forward_port: postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format( local_forward_port=local_forward_port) print('Local Postgres forwarding URL: ', postgres_url) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(postgres_url), event_storage=PostgresEventLogStorage(postgres_url), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) yield instance
def dagster_instance_with_k8s_scheduler(helm_postgres_url_for_k8s_run_launcher, run_launcher, k8s_scheduler, schedule_tempdir): # pylint: disable=redefined-outer-name with DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(schedule_tempdir), run_storage=SqliteRunStorage.from_local( os.path.join(schedule_tempdir, "runs")), event_storage=PostgresEventLogStorage( helm_postgres_url_for_k8s_run_launcher), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=run_launcher, schedule_storage=SqliteScheduleStorage.from_local( os.path.join(schedule_tempdir, "schedules")), scheduler=k8s_scheduler, ) as instance: yield instance
def _postgres_instance_with_grpc_api_hijack(): with seven.TemporaryDirectory() as temp_dir: with graphql_postgres_instance() as pg_conn_string: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=TestPostgresInstance.clean_run_storage(pg_conn_string), event_storage=TestPostgresInstance.clean_event_log_storage(pg_conn_string), compute_log_manager=LocalComputeLogManager(temp_dir), run_launcher=EphemeralGrpcRunLauncher(), schedule_storage=TestPostgresInstance.clean_schedule_storage( pg_conn_string ), ) try: yield instance finally: instance.run_launcher.join()
def test_basic_start_scheduled_execution_with_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) scheduler_handle = context.scheduler_handle scheduler_handle.up(python_path=sys.executable, repository_path=file_relative_path( __file__, '../')) result = execute_dagster_graphql( context, START_SCHEDULED_EXECUTION_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert not result.errors assert result.data # just test existence assert (result.data['startScheduledExecution']['__typename'] == 'LaunchPipelineExecutionSuccess') assert uuid.UUID( result.data['startScheduledExecution']['run']['runId']) assert (result.data['startScheduledExecution']['run']['pipeline'] ['name'] == 'no_config_pipeline') assert any( tag['key'] == 'dagster/schedule_name' and tag['value'] == 'no_config_pipeline_hourly_schedule' for tag in result.data['startScheduledExecution']['run']['tags'])
def test_run_launcher(): test_queue = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), run_launcher=test_queue, ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) result = execute_dagster_graphql( context=context, query=LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ 'executionParams': { 'selector': { 'name': 'no_config_pipeline' }, 'mode': 'default' } }, ) assert result.data['launchPipelineExecution'][ '__typename'] == 'LaunchPipelineRunSuccess' assert result.data['launchPipelineExecution']['run'][ 'status'] == 'NOT_STARTED' run_id = result.data['launchPipelineExecution']['run']['runId'] test_queue.run_one(instance) result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id}) assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun' assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
def test_start_stop_schedule(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesystemTestScheduler(temp_dir), run_launcher=SyncInMemoryRunLauncher(), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.legacy_get_repository_definition() reconcile_scheduler_state( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule start_result = execute_dagster_graphql( context, START_SCHEDULES_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert start_result.data['startSchedule']['schedule'][ 'status'] == 'RUNNING' # Stop schedule stop_result = execute_dagster_graphql( context, STOP_SCHEDULES_QUERY, variables={'scheduleName': 'no_config_pipeline_hourly_schedule'}, ) assert stop_result.data['stopRunningSchedule']['schedule'][ 'status'] == 'STOPPED'
def test_get_all_schedules(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(temp_dir), schedule_storage=SqliteScheduleStorage.from_local(temp_dir), scheduler=FilesytemTestScheduler(temp_dir), ) context = define_context_for_repository_yaml(path=file_relative_path( __file__, '../repository.yaml'), instance=instance) # Initialize scheduler repository = context.get_repository() scheduler_handle = context.scheduler_handle scheduler_handle.up( python_path=sys.executable, repository_path="", repository=repository, instance=instance, ) # Start schedule schedule = instance.start_schedule( repository, "no_config_pipeline_hourly_schedule") # Query Scheduler + all Schedules scheduler_result = execute_dagster_graphql(context, GET_SCHEDULES_QUERY) assert scheduler_result.data assert scheduler_result.data['scheduler'] assert scheduler_result.data['scheduler']['runningSchedules'] assert len( scheduler_result.data['scheduler']['runningSchedules']) == 11 for schedule in scheduler_result.data['scheduler']['runningSchedules']: assert (schedule['scheduleDefinition']['environmentConfigYaml'] == 'storage:\n filesystem: {}\n')
def dagster_instance_for_user_deployments_subchart_disabled( helm_postgres_url_for_user_deployments_subchart_disabled, ): # pylint: disable=redefined-outer-name tempdir = DagsterInstance.temp_storage() with DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(helm_postgres_url_for_user_deployments_subchart_disabled), event_storage=PostgresEventLogStorage( helm_postgres_url_for_user_deployments_subchart_disabled ), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=ExplodingRunLauncher(), ) as instance: yield instance check_export_runs(instance)
def dagster_instance_for_daemon( helm_postgres_url_for_daemon, ): # pylint: disable=redefined-outer-name tempdir = DagsterInstance.temp_storage() with DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(helm_postgres_url_for_daemon), event_storage=PostgresEventLogStorage(helm_postgres_url_for_daemon), schedule_storage=PostgresScheduleStorage(helm_postgres_url_for_daemon), compute_log_manager=NoOpComputeLogManager(), run_coordinator=QueuedRunCoordinator(), run_launcher=ExplodingRunLauncher(), scheduler=DagsterDaemonScheduler(), ) as instance: yield instance check_export_runs(instance)
def run_launch(execution_args, expected_count=None): runner = CliRunner() run_launcher = InMemoryRunLauncher() with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance: _instance.return_value = instance result = runner.invoke(pipeline_launch_command, execution_args) assert result.exit_code == 0, result.stdout if expected_count: assert len(run_launcher.queue()) == expected_count
def test_fs_stores(): @pipeline def simple(): @solid def easy(context): context.log.info("easy") return "easy" easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), settings={"telemetry": { "enabled": False }}, ) result = execute_pipeline(simple, instance=instance) assert run_store.has_run(result.run_id) assert run_store.get_run_by_id( result.run_id).status == PipelineRunStatus.SUCCESS assert DagsterEventType.PIPELINE_SUCCESS in [ event.dagster_event.event_type for event in event_store.get_logs_for_run(result.run_id) if event.is_dagster_event ] stats = event_store.get_stats_for_run(result.run_id) assert stats.steps_succeeded == 1 assert stats.end_time is not None
def ephemeral(tempdir=None): from dagster.core.storage.event_log import InMemoryEventLogStorage from dagster.core.storage.root import LocalArtifactStorage from dagster.core.storage.runs import InMemoryRunStorage from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager if tempdir is None: tempdir = DagsterInstance.temp_storage() feature_set = dagster_feature_set(tempdir) return DagsterInstance( InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=InMemoryRunStorage(), event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager( _compute_logs_directory(tempdir)), feature_set=feature_set, )
def dagster_instance_for_k8s_run_launcher( helm_postgres_url_for_k8s_run_launcher, ): # pylint: disable=redefined-outer-name tempdir = DagsterInstance.temp_storage() instance_ref = InstanceRef.from_dir(tempdir) with DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(helm_postgres_url_for_k8s_run_launcher), event_storage=PostgresEventLogStorage(helm_postgres_url_for_k8s_run_launcher), schedule_storage=PostgresScheduleStorage(helm_postgres_url_for_k8s_run_launcher), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=ExplodingRunLauncher(), ref=instance_ref, ) as instance: yield instance check_export_runs(instance)
def dagster_instance_with_k8s_scheduler(helm_namespace, run_launcher, k8s_scheduler, schedule_tempdir): with local_port_forward_postgres( namespace=helm_namespace) as local_forward_port: postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format( local_forward_port=local_forward_port) print('Local Postgres forwarding URL: ', postgres_url) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(schedule_tempdir), run_storage=SqliteRunStorage.from_local( os.path.join(schedule_tempdir, 'runs')), event_storage=PostgresEventLogStorage(postgres_url), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, schedule_storage=SqliteScheduleStorage.from_local( os.path.join(schedule_tempdir, 'schedules')), scheduler=k8s_scheduler, ) yield instance
def dagster_instance_for_k8s_run_launcher(helm_namespace_for_k8s_run_launcher, run_launcher): tempdir = DagsterInstance.temp_storage() with local_port_forward_postgres( namespace=helm_namespace_for_k8s_run_launcher ) as local_forward_port: postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format( local_forward_port=local_forward_port ) print("Local Postgres forwarding URL: ", postgres_url) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(postgres_url), event_storage=PostgresEventLogStorage(postgres_url), schedule_storage=PostgresScheduleStorage(postgres_url), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) yield instance
def dagster_instance_for_daemon(helm_namespace_for_daemon, run_launcher): # pylint: disable=redefined-outer-name tempdir = DagsterInstance.temp_storage() with local_port_forward_postgres( namespace=helm_namespace_for_daemon) as local_forward_port: postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format( local_forward_port=local_forward_port) print("Local Postgres forwarding URL: ", postgres_url) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(postgres_url), event_storage=PostgresEventLogStorage(postgres_url), schedule_storage=PostgresScheduleStorage(postgres_url), compute_log_manager=NoOpComputeLogManager(), run_coordinator=QueuedRunCoordinator(), run_launcher=run_launcher, scheduler=DagsterDaemonScheduler(), ) yield instance
def test_run_record_timestamps(self, storage): assert storage self._skip_in_memory(storage) @op def a(): pass @job def my_job(): a() with tempfile.TemporaryDirectory() as temp_dir: if storage._instance: # pylint: disable=protected-access instance = storage._instance # pylint: disable=protected-access else: instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=storage, event_storage=InMemoryEventLogStorage(), compute_log_manager=NoOpComputeLogManager(), run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), ) freeze_datetime = to_timezone( create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific" ) with pendulum.test(freeze_datetime): result = my_job.execute_in_process(instance=instance) records = instance.get_run_records( filters=PipelineRunsFilter(run_ids=[result.run_id]) ) assert len(records) == 1 record = records[0] assert record.start_time == freeze_datetime.timestamp() assert record.end_time == freeze_datetime.timestamp()
def test_compute_log_manager_skip_empty_upload(mock_s3_bucket): @op def easy(context): context.log.info("easy") @job def simple(): easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) PREFIX = "my_prefix" manager = S3ComputeLogManager(bucket=mock_s3_bucket.name, prefix=PREFIX, skip_empty_files=True) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) stderr_object = mock_s3_bucket.Object( key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.err" ).get() assert stderr_object with pytest.raises(ClientError): # stdout is not uploaded because we do not print anything to stdout mock_s3_bucket.Object( key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.out" ).get()
def test_fs_stores(): @pipeline def simple(): @solid def easy(context): context.log.info('easy') return 'easy' easy() with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, ) run = RunConfig() execute_pipeline(simple, run_config=run, instance=instance) assert run_store.has_run(run.run_id) assert run_store.get_run_by_id( run.run_id).status == PipelineRunStatus.SUCCESS assert DagsterEventType.PIPELINE_SUCCESS in [ event.dagster_event.event_type for event in event_store.get_logs_for_run(run.run_id) if event.is_dagster_event ] stats = event_store.get_stats_for_run(run.run_id) assert stats.steps_succeeded == 1 assert stats.end_time is not None
def test_compute_log_manager(s3_bucket): @pipeline def simple(): @solid def easy(context): context.log.info('easy') print(HELLO_WORLD) return 'easy' easy() # Uses mock S3 s3 = boto3.client('s3') s3.create_bucket(Bucket=s3_bucket) with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager(bucket=s3_bucket, prefix='my_prefix', local_dir=temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_launcher=CliApiRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = s3.get_object( Bucket=s3_bucket, Key='{prefix}/storage/{run_id}/compute_logs/easy.compute.err'. format(prefix='my_prefix', run_id=result.run_id), ) stderr_s3 = six.ensure_str(s3_object['Body'].read()) for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, 'compute_logs') for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def dagster_instance(helm_namespace, run_launcher): # pylint: disable=redefined-outer-name @contextmanager def local_port_forward_postgres(): print('Port-forwarding postgres') postgres_pod_name = (check_output([ 'kubectl', 'get', 'pods', '--namespace', helm_namespace, '-l', 'app=postgresql,release=dagster', '-o', 'jsonpath="{.items[0].metadata.name}"', ]).decode('utf-8').strip('"')) forward_port = find_free_port() wait_for_pod(postgres_pod_name, namespace=helm_namespace) try: p = subprocess.Popen([ 'kubectl', 'port-forward', '--namespace', helm_namespace, postgres_pod_name, '{forward_port}:5432'.format(forward_port=forward_port), ]) # Validate port forwarding works start = time.time() while True: if time.time() - start > PG_PORT_FORWARDING_TIMEOUT: raise Exception( 'Timed out while waiting for postgres port forwarding') print( 'Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be' ' available...' % (postgres_pod_name, forward_port)) try: conn = psycopg2.connect( database='test', user='******', password='******', host='localhost', port=forward_port, ) conn.close() break except: # pylint: disable=bare-except, broad-except time.sleep(1) continue yield forward_port finally: print('Terminating port-forwarding') p.terminate() tempdir = DagsterInstance.temp_storage() with local_port_forward_postgres() as local_forward_port: postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format( local_forward_port=local_forward_port) print('Local Postgres forwarding URL: ', postgres_url) instance = DagsterInstance( instance_type=InstanceType.EPHEMERAL, local_artifact_storage=LocalArtifactStorage(tempdir), run_storage=PostgresRunStorage(postgres_url), event_storage=PostgresEventLogStorage(postgres_url), compute_log_manager=NoOpComputeLogManager(), run_launcher=run_launcher, ) yield instance
def test_compute_log_manager_with_envvar(gcs_bucket): @job def simple(): @op def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() with open(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")) as f: with tempfile.TemporaryDirectory() as temp_dir: with environ({"ENV_VAR": f.read(), "DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = GCSComputeLogManager( bucket=gcs_bucket, prefix="my_prefix", local_dir=temp_dir, json_credentials_envvar="ENV_VAR", ) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) compute_steps = [ event.step_key for event in result.all_node_events if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check GCS directly stderr_gcs = ( storage.Client() .bucket(gcs_bucket) .blob(f"my_prefix/storage/{result.run_id}/compute_logs/easy.err") .download_as_bytes() .decode("utf-8") ) for expected in EXPECTED_LOGS: assert expected in stderr_gcs # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_dev_loop_changing_versions(): with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_launcher=CliApiRunLauncher(), ) run_config = { "solids": { "create_string_1": {"config": {"input_str": "apple", "base_dir": temp_dir}}, "create_string_2": {"config": {"input_str": "apple", "base_dir": temp_dir}}, "take_string_1": {"config": {"input_str": "apple", "base_dir": temp_dir}}, "take_string_2": {"config": {"input_str": "apple", "base_dir": temp_dir}}, "take_string_two_inputs": {"config": {"input_str": "apple", "base_dir": temp_dir}}, }, "intermediate_storage": {"filesystem": {"config": {"base_dir": temp_dir}}}, } result = execute_pipeline( basic_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") run_config["solids"]["take_string_1"]["config"]["input_str"] = "banana" assert set( get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") ) == set(["take_string_1.compute", "take_string_two_inputs.compute"]) result2 = execute_pipeline( basic_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result2.success assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") run_config["solids"]["take_string_two_inputs"]["config"]["input_str"] = "banana" assert get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") == [ "take_string_two_inputs.compute" ] result3 = execute_pipeline( basic_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result3.success assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")
def test_execute_display_command(): with tempfile.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = ConsolidatedSqliteEventLogStorage(temp_dir) compute_log_manager = LocalComputeLogManager(temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=compute_log_manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) run_config = { "solids": { "create_string_1_asset": { "config": { "input_str": "apple" } }, "take_string_1_asset": { "config": { "input_str": "apple" } }, }, "resources": { "io_manager": { "config": { "base_dir": temp_dir } } }, } # write run config to temp file # file is temp because intermediate storage directory is temporary with open(os.path.join(temp_dir, "pipeline_config.yaml"), "w") as f: f.write(yaml.dump(run_config)) kwargs = { "config": (os.path.join(temp_dir, "pipeline_config.yaml"), ), "pipeline": "asset_pipeline", "python_file": file_relative_path( __file__, "../../execution_tests/memoized_dev_loop_pipeline.py"), "tags": '{"dagster/is_memoized_run": "true"}', } with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output # execute the pipeline once so that addresses have been populated. result = execute_pipeline( asset_pipeline, run_config=run_config, mode="only_mode", tags={"dagster/is_memoized_run": "true"}, instance=instance, ) assert result.success with Capturing() as output: execute_list_versions_command(kwargs=kwargs, instance=instance) assert output
def test_compute_log_manager(mock_s3_bucket): @op def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" @job def simple(): easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager(bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) compute_steps = [ event.step_key for event in result.all_node_events if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = mock_s3_bucket.Object( key=f"my_prefix/storage/{result.run_id}/compute_logs/easy.err") stderr_s3 = s3_object.get()["Body"].read().decode("utf-8") for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_compute_log_manager(mock_s3_bucket): @pipeline def simple(): @solid def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager( bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir ) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = mock_s3_bucket.Object( key="{prefix}/storage/{run_id}/compute_logs/easy.err".format( prefix="my_prefix", run_id=result.run_id ), ) stderr_s3 = six.ensure_str(s3_object.get()["Body"].read()) for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_compute_log_manager( mock_create_blob_client, mock_generate_blob_sas, storage_account, container, credential ): mock_generate_blob_sas.return_value = "fake-url" fake_client = FakeBlobServiceClient(storage_account) mock_create_blob_client.return_value = fake_client @pipeline def simple(): @solid def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() with tempfile.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = AzureBlobComputeLogManager( storage_account=storage_account, container=container, prefix="my_prefix", local_dir=temp_dir, secret_key=credential, ) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=SyncInMemoryRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check ADLS2 directly adls2_object = fake_client.get_blob_client( container=container, blob="{prefix}/storage/{run_id}/compute_logs/easy.err".format( prefix="my_prefix", run_id=result.run_id ), ) adls2_stderr = adls2_object.download_blob().readall().decode("utf-8") for expected in EXPECTED_LOGS: assert expected in adls2_stderr # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data