class S3Uploader(BaseUploader): def __init__(self, uri: str): self._uploader = None self._uri = uri def start(self): self._uploader = MultiPartUploader(QuerybookSettings.STORE_BUCKET_NAME, self.uri) def write(self, data: str) -> bool: return self._uploader.write(data) def end(self): self._uploader.complete() self._uploader = None @property def is_uploading(self): return self._uploader is not None @property def uri(self): return f"{QuerybookSettings.STORE_PATH_PREFIX}{self._uri}"
def _df_to_s3(self, df: pd.DataFrame): MULTI_UPLOADER_CHUNK_SIZE = 500 bucket, key = S3FileCopier.s3_path_to_bucket_key( self.destination_s3_path()) s3_uploader = MultiPartUploader(bucket, key) for chunk_no, sub_df in df.groupby( np.arange(len(df)) // MULTI_UPLOADER_CHUNK_SIZE): s3_uploader.write( sub_df.to_csv(index=False, header=(chunk_no == 0))) s3_uploader.complete()
def start(self): self._uploader = MultiPartUploader(QuerybookSettings.STORE_BUCKET_NAME, self.uri)