def gcs_export_directory(bucket_name: str, today: datetime.date,
                         state_code: str) -> GcsfsDirectoryPath:
    """Returns a GCS directory to export files into, of the format:
    gs://{bucket_name}/ingested_state_data/{state_code}/{YYYY}/{MM}/{DD}
    """
    path = GcsfsDirectoryPath.from_bucket_and_blob_name(
        bucket_name=bucket_name,
        blob_name=
        f'ingested_state_data/{state_code}/{today.year:04}/{today.month:02}/{today.day:02}/'
    )
    return cast(GcsfsDirectoryPath, path)
 def _get_files_to_move(self) -> List[str]:
     """Function that gets the files to move to deprecated based on the file_filter and end/start dates specified"""
     subdirs = gsutil_get_storage_subdirs_containing_file_types(
         storage_bucket_path=GcsfsDirectoryPath.from_bucket_and_blob_name(
             self.region_storage_dir_path_for_file_type.bucket_name,
             self.region_code).abs_path(),
         file_type=self.file_type,
         lower_bound_date=self.start_date_bound,
         upper_bound_date=self.end_date_bound)
     result = []
     for subdir_path in subdirs:
         from_paths = gsutil_ls(f'{subdir_path}*.csv')
         for from_path in from_paths:
             _, file_name = os.path.split(from_path)
             if re.match(INGESTED_FILE_REGEX, file_name):
                 if not self.file_filter or re.search(
                         self.file_filter, file_name):
                     result.append(from_path)
     return result
예제 #3
0
 def test_get_paths_to_upload_is_correct(
     self,
     mock_fs_factory: Mock,
 ) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/test_file.txt"),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/subdir1/test_file.txt",
         ),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsDirectoryPath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/subdir2/"),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[
             ("recidiviz-456-direct-ingest-state-us-xx/raw_data/", TODAY),
         ],
         project_id="recidiviz-456",
         region="us_xx",
     )
     result = [
         ("recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
          TODAY),
         (
             "recidiviz-456-direct-ingest-state-us-xx/raw_data/subdir1/test_file.txt",
             TODAY,
         ),
     ]
     self.assertListEqual(result, controller.get_paths_to_upload())
     self.assertFalse(self.us_xx_manager.is_instance_paused())