def _copy_to_storage(self, path: str, normalized_file_name: str) -> None: storage_dir_path = f'gs://{self.storage_bucket}/{self.date}' logging.info("Copying [%s] to [%s]", path, storage_dir_path) full_file_storage_path = f'{storage_dir_path}/{normalized_file_name}' gsutil_cp(path, full_file_storage_path)
def _copy_files_for_date(self, date_str: str): from_path = f'gs://{self.prod_storage_bucket}/{date_str}/*' to_path = f'gs://{self.staging_storage_bucket}/{date_str}/' if not self.dry_run: gsutil_cp(from_path=from_path, to_path=to_path) with self.mutex: self.copy_list.append((from_path, to_path)) if self.copy_progress: self.copy_progress.next()
def _copy_files_for_date(self, subdir_path_str: str): dir_path = GcsfsDirectoryPath.from_absolute_path(subdir_path_str.rstrip('/')) from_path = f'gs://{self.prod_region_storage_dir_path.bucket_name}/{dir_path.relative_path}*' to_path = f'gs://{self.staging_region_storage_dir_path.bucket_name}/{dir_path.relative_path}' if not self.dry_run: gsutil_cp(from_path=from_path, to_path=to_path) with self.mutex: self.copy_list.append((from_path, to_path)) if self.copy_progress: self.copy_progress.next()
def _copy_to_ingest_bucket(self, path: str, normalized_file_name: str) -> None: full_file_upload_path_uri = GcsfsFilePath.from_directory_and_file_name( self.ingest_bucket, normalized_file_name).uri() if not self.dry_run: gsutil_cp(path, full_file_upload_path_uri) with self.mutex: self.copies_list.append((path, full_file_upload_path_uri)) if self.move_progress: self.move_progress.next()
def _copy_files_for_date(self, subdir_path_str: str) -> None: dir_path = GcsfsDirectoryPath.from_absolute_path( subdir_path_str.rstrip("/")) from_path = f"gs://{self.source_region_storage_dir_path.bucket_name}/{dir_path.relative_path}*" to_path = f"gs://{self.destination_region_storage_dir_path.bucket_name}/{dir_path.relative_path}" if not self.dry_run: gsutil_cp(from_path=from_path, to_path=to_path) with self.mutex: self.copy_list.append((from_path, to_path)) if self.copy_progress: self.copy_progress.next()
def _copy_to_storage(self, path: str, normalized_file_name: str) -> None: storage_dir_path = os.path.join( 'gs://', self.storage_bucket, GcsfsDirectIngestFileType.RAW_DATA.value, f'{self.datetime.year:04}', f'{self.datetime.month:02}', f'{self.datetime.day:02}') full_file_storage_path = f'{storage_dir_path}/{normalized_file_name}' if self.dry_run: logging.info("[DRY RUN] Would copy [%s] to [%s]", path, full_file_storage_path) return logging.info("Copying [%s] to [%s]", path, full_file_storage_path) gsutil_cp(path, full_file_storage_path)
def _copy_to_ingest_bucket( self, path: str, full_file_upload_path: GcsfsFilePath, ) -> None: if not self.dry_run: try: gsutil_cp(path, full_file_upload_path.uri()) self.uploaded_files.append(path) self.copies_list.append((path, full_file_upload_path.uri())) except ValueError: self.unable_to_upload_files.append(path) else: self.copies_list.append((path, full_file_upload_path.uri())) with self.mutex: if self.move_progress: # pylint: disable=not-callable self.move_progress.next()