Exemple #1
0
class S3Uploader(BaseUploader):
    def __init__(self, uri: str):
        self._uploader = None
        self._uri = uri

    def start(self):
        self._uploader = MultiPartUploader(QuerybookSettings.STORE_BUCKET_NAME,
                                           self.uri)

    def write(self, data: str) -> bool:
        return self._uploader.write(data)

    def end(self):
        self._uploader.complete()
        self._uploader = None

    @property
    def is_uploading(self):
        return self._uploader is not None

    @property
    def uri(self):
        return f"{QuerybookSettings.STORE_PATH_PREFIX}{self._uri}"
Exemple #2
0
    def _df_to_s3(self, df: pd.DataFrame):
        MULTI_UPLOADER_CHUNK_SIZE = 500

        bucket, key = S3FileCopier.s3_path_to_bucket_key(
            self.destination_s3_path())
        s3_uploader = MultiPartUploader(bucket, key)
        for chunk_no, sub_df in df.groupby(
                np.arange(len(df)) // MULTI_UPLOADER_CHUNK_SIZE):
            s3_uploader.write(
                sub_df.to_csv(index=False, header=(chunk_no == 0)))
        s3_uploader.complete()
Exemple #3
0
 def start(self):
     self._uploader = MultiPartUploader(QuerybookSettings.STORE_BUCKET_NAME,
                                        self.uri)