def _get_subdirs_to_copy(self) -> List[str]: return gsutil_get_storage_subdirs_containing_file_types( storage_bucket_path=self.source_region_storage_dir_path.abs_path(), file_type=self.file_type_to_copy, upper_bound_date=self.end_date_bound, lower_bound_date=self.start_date_bound, )
def get_date_subdir_paths(self) -> List[str]: return gsutil_get_storage_subdirs_containing_file_types( storage_bucket_path=self.storage_bucket.abs_path(), file_type=GcsfsDirectIngestFileType.RAW_DATA, upper_bound_date=self.end_date_bound, lower_bound_date=self.start_date_bound, )
def get_date_subdir_paths(self) -> List[str]: return gsutil_get_storage_subdirs_containing_file_types( storage_bucket_path=self.storage_bucket.abs_path(), file_type=self.file_type_to_move, upper_bound_date=self.end_date_bound, lower_bound_date=self.start_date_bound, )
def _get_files_to_move(self) -> List[str]: """Function that gets the files to move to deprecated based on the file_filter and end/start dates specified""" subdirs = gsutil_get_storage_subdirs_containing_file_types( storage_bucket_path=self.region_storage_dir_path.abs_path(), file_type=self.file_type, lower_bound_date=self.start_date_bound, upper_bound_date=self.end_date_bound) result = [] for subdir_path in subdirs: from_paths = gsutil_ls(f'{subdir_path}*.csv') for from_path in from_paths: _, file_name = os.path.split(from_path) if re.match(INGESTED_FILE_REGEX, file_name): if not self.file_filter or re.search( self.file_filter, file_name): result.append(from_path) return result