Esempio n. 1
0
 def _get_subdirs_to_copy(self) -> List[str]:
     return gsutil_get_storage_subdirs_containing_file_types(
         storage_bucket_path=self.source_region_storage_dir_path.abs_path(),
         file_type=self.file_type_to_copy,
         upper_bound_date=self.end_date_bound,
         lower_bound_date=self.start_date_bound,
     )
Esempio n. 2
0
 def get_date_subdir_paths(self) -> List[str]:
     return gsutil_get_storage_subdirs_containing_file_types(
         storage_bucket_path=self.storage_bucket.abs_path(),
         file_type=GcsfsDirectIngestFileType.RAW_DATA,
         upper_bound_date=self.end_date_bound,
         lower_bound_date=self.start_date_bound,
     )
Esempio n. 3
0
 def get_date_subdir_paths(self) -> List[str]:
     return gsutil_get_storage_subdirs_containing_file_types(
         storage_bucket_path=self.storage_bucket.abs_path(),
         file_type=self.file_type_to_move,
         upper_bound_date=self.end_date_bound,
         lower_bound_date=self.start_date_bound,
     )
Esempio n. 4
0
 def _get_files_to_move(self) -> List[str]:
     """Function that gets the files to move to deprecated based on the file_filter and end/start dates specified"""
     subdirs = gsutil_get_storage_subdirs_containing_file_types(
         storage_bucket_path=self.region_storage_dir_path.abs_path(),
         file_type=self.file_type,
         lower_bound_date=self.start_date_bound,
         upper_bound_date=self.end_date_bound)
     result = []
     for subdir_path in subdirs:
         from_paths = gsutil_ls(f'{subdir_path}*.csv')
         for from_path in from_paths:
             _, file_name = os.path.split(from_path)
             if re.match(INGESTED_FILE_REGEX, file_name):
                 if not self.file_filter or re.search(
                         self.file_filter, file_name):
                     result.append(from_path)
     return result