def test_do_upload_graceful_failures(self, mock_fs_factory: Mock) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "test-project-direct-ingest-state-us-xx", "raw_data/test_file.txt" ), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ( "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY, ), ( "test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt", TODAY, ), ], project_id="test-project", region="us_xx", ) uploaded_files, unable_to_upload_files = controller.do_upload() self.assertEqual( uploaded_files, ["test-project-direct-ingest-state-us-xx/raw_data/test_file.txt"], ) self.assertEqual( unable_to_upload_files, ["test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt"], )
def test_do_upload_succeeds(self, mock_fs_factory: Mock) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.txt"), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[( "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY, )], project_id="recidiviz-456", region="us_xx", ) expected_result = [ "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt" ] result: MultiRequestResultWithSkipped[str, str, str] = controller.do_upload() self.assertEqual(result.successes, expected_result) self.assertEqual(len(result.failures), 0) self.assertEqual(len(controller.skipped_files), 0) self.assertFalse(self.us_xx_manager.is_instance_paused())
def test_do_upload_sets_correct_content_type( self, mock_fs_factory: Mock, ) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.txt"), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.csv"), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY, ), ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv", TODAY, ), ], project_id="recidiviz-456", region="us_xx", ) result: MultiRequestResultWithSkipped[str, str, str] = controller.do_upload() self.assertListEqual( result.successes, [ "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv", ], ) resulting_content_types = [ file.content_type for file in mock_fs.files.values() ] self.assertListEqual(resulting_content_types, ["text/plain", "text/csv"]) self.assertFalse(self.us_xx_manager.is_instance_paused())
def test_get_paths_to_upload_is_correct( self, mock_fs_factory: Mock, ) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.txt"), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/subdir1/test_file.txt", ), local_path=None, ) mock_fs.test_add_path( path=GcsfsDirectoryPath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/subdir2/"), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ("recidiviz-456-direct-ingest-state-us-xx/raw_data/", TODAY), ], project_id="recidiviz-456", region="us_xx", ) result = [ ("recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY), ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/subdir1/test_file.txt", TODAY, ), ] self.assertListEqual(result, controller.get_paths_to_upload()) self.assertFalse(self.us_xx_manager.is_instance_paused())
def ls_with_blob_prefix(self, bucket_name: str, blob_prefix: str) -> List[Union[GcsfsDirectoryPath, GcsfsFilePath]]: prefix = GcsfsFilePath.from_bucket_and_blob_name(bucket_name, blob_prefix) with self.mutex: results: List[Union[GcsfsDirectoryPath, GcsfsFilePath]] = [] for abs_path, entry in self.files.items(): if abs_path.startswith(prefix.abs_path()): results.append(entry.gcs_path) return results
def test_do_upload_sets_correct_content_type(self, mock_fs_factory: Mock) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "test-project-direct-ingest-state-us-xx", "raw_data/test_file.txt" ), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "test-project-direct-ingest-state-us-xx", "raw_data/test_file.csv" ), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ( "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY, ), ( "test-project-direct-ingest-state-us-xx/raw_data/test_file.csv", TODAY, ), ], project_id="test-project", region="us_xx", ) uploaded_files, _ = controller.do_upload() self.assertListEqual( uploaded_files, [ "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt", "test-project-direct-ingest-state-us-xx/raw_data/test_file.csv", ], ) resulting_content_types = [file.content_type for file in mock_fs.files.values()] self.assertListEqual(resulting_content_types, ["text/plain", "text/csv"])
def test_skip_already_processed_or_discovered_files( self, mock_fs_factory: Mock, ) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.txt"), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.csv"), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/skipped.csv", ), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/discovered.csv", ), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY, ), ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv", TODAY, ), ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/skipped.csv", TODAY, ), ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/discovered.csv", TODAY, ), ], project_id="recidiviz-456", region="us_xx", ) result: MultiRequestResultWithSkipped[str, str, str] = controller.do_upload() self.assertListEqual( result.successes, [ "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv", ], ) self.assertListEqual( result.skipped, [ "recidiviz-456-direct-ingest-state-us-xx/raw_data/skipped.csv", "recidiviz-456-direct-ingest-state-us-xx/raw_data/discovered.csv", ], ) self.assertFalse(self.us_xx_manager.is_instance_paused())