def gcs_export_directory(bucket_name: str, today: datetime.date, state_code: str) -> GcsfsDirectoryPath: """Returns a GCS directory to export files into, of the format: gs://{bucket_name}/ingested_state_data/{state_code}/{YYYY}/{MM}/{DD} """ path = GcsfsDirectoryPath.from_bucket_and_blob_name( bucket_name=bucket_name, blob_name= f'ingested_state_data/{state_code}/{today.year:04}/{today.month:02}/{today.day:02}/' ) return cast(GcsfsDirectoryPath, path)
def _get_files_to_move(self) -> List[str]: """Function that gets the files to move to deprecated based on the file_filter and end/start dates specified""" subdirs = gsutil_get_storage_subdirs_containing_file_types( storage_bucket_path=GcsfsDirectoryPath.from_bucket_and_blob_name( self.region_storage_dir_path_for_file_type.bucket_name, self.region_code).abs_path(), file_type=self.file_type, lower_bound_date=self.start_date_bound, upper_bound_date=self.end_date_bound) result = [] for subdir_path in subdirs: from_paths = gsutil_ls(f'{subdir_path}*.csv') for from_path in from_paths: _, file_name = os.path.split(from_path) if re.match(INGESTED_FILE_REGEX, file_name): if not self.file_filter or re.search( self.file_filter, file_name): result.append(from_path) return result
def test_get_paths_to_upload_is_correct( self, mock_fs_factory: Mock, ) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.txt"), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/subdir1/test_file.txt", ), local_path=None, ) mock_fs.test_add_path( path=GcsfsDirectoryPath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/subdir2/"), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ("recidiviz-456-direct-ingest-state-us-xx/raw_data/", TODAY), ], project_id="recidiviz-456", region="us_xx", ) result = [ ("recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY), ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/subdir1/test_file.txt", TODAY, ), ] self.assertListEqual(result, controller.get_paths_to_upload()) self.assertFalse(self.us_xx_manager.is_instance_paused())