def test_blank_compute_logs(mock_s3_bucket): with tempfile.TemporaryDirectory() as temp_dir: manager = S3ComputeLogManager(bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir) # simulate subscription to an in-progress run, where there is no key in the bucket stdout = manager.read_logs_file("my_run_id", "my_step_key", ComputeIOType.STDOUT) stderr = manager.read_logs_file("my_run_id", "my_step_key", ComputeIOType.STDERR) assert not stdout.data assert not stderr.data
def test_compute_log_manager_skip_empty_upload(mock_s3_bucket): @op def easy(context): context.log.info("easy") @job def simple(): easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) PREFIX = "my_prefix" manager = S3ComputeLogManager(bucket=mock_s3_bucket.name, prefix=PREFIX, skip_empty_files=True) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) stderr_object = mock_s3_bucket.Object( key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.err" ).get() assert stderr_object with pytest.raises(ClientError): # stdout is not uploaded because we do not print anything to stdout mock_s3_bucket.Object( key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.out" ).get()
def test_compute_log_manager(mock_s3_bucket): @pipeline def simple(): @solid def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" easy() with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager( bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir ) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = mock_s3_bucket.Object( key="{prefix}/storage/{run_id}/compute_logs/easy.err".format( prefix="my_prefix", run_id=result.run_id ), ) stderr_s3 = six.ensure_str(s3_object.get()["Body"].read()) for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_compute_log_manager(s3_bucket): @pipeline def simple(): @solid def easy(context): context.log.info('easy') print(HELLO_WORLD) return 'easy' easy() # Uses mock S3 s3 = boto3.client('s3') s3.create_bucket(Bucket=s3_bucket) with seven.TemporaryDirectory() as temp_dir: run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager(bucket=s3_bucket, prefix='my_prefix', local_dir=temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_launcher=CliApiRunLauncher(), ) result = execute_pipeline(simple, instance=instance) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = s3.get_object( Bucket=s3_bucket, Key='{prefix}/storage/{run_id}/compute_logs/easy.compute.err'. format(prefix='my_prefix', run_id=result.run_id), ) stderr_s3 = six.ensure_str(s3_object['Body'].read()) for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, 'compute_logs') for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data
def test_compute_log_manager(mock_s3_bucket): @op def easy(context): context.log.info("easy") print(HELLO_WORLD) # pylint: disable=print-call return "easy" @job def simple(): easy() with tempfile.TemporaryDirectory() as temp_dir: with environ({"DAGSTER_HOME": temp_dir}): run_store = SqliteRunStorage.from_local(temp_dir) event_store = SqliteEventLogStorage(temp_dir) manager = S3ComputeLogManager(bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir) instance = DagsterInstance( instance_type=InstanceType.PERSISTENT, local_artifact_storage=LocalArtifactStorage(temp_dir), run_storage=run_store, event_storage=event_store, compute_log_manager=manager, run_coordinator=DefaultRunCoordinator(), run_launcher=DefaultRunLauncher(), ref=InstanceRef.from_dir(temp_dir), ) result = simple.execute_in_process(instance=instance) compute_steps = [ event.step_key for event in result.all_node_events if event.event_type == DagsterEventType.STEP_START ] assert len(compute_steps) == 1 step_key = compute_steps[0] stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data # Check S3 directly s3_object = mock_s3_bucket.Object( key=f"my_prefix/storage/{result.run_id}/compute_logs/easy.err") stderr_s3 = s3_object.get()["Body"].read().decode("utf-8") for expected in EXPECTED_LOGS: assert expected in stderr_s3 # Check download behavior by deleting locally cached logs compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs") for filename in os.listdir(compute_logs_dir): os.unlink(os.path.join(compute_logs_dir, filename)) stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT) assert stdout.data == HELLO_WORLD + SEPARATOR stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR) for expected in EXPECTED_LOGS: assert expected in stderr.data