def update(self, chunksize=100000): """Updates a dataset with new data. Args: chunksize (int, optional): Number of rows to transmit to the server with each request. """ # form request body and create a session for data uploads self.__form_upload_body() response = datasets.upload_session(connection=self._connection, dataset_id=self._dataset_id, body=self.__upload_body) response_json = response.json() self._session_id = response_json['uploadSessionId'] # upload each table for ix, _table in enumerate(self._tables): _df, _name = _table["data_frame"], _table["table_name"] # break the data up into chunks using a generator chunks = (_df[i:i + chunksize] for i in range(0, _df.shape[0], chunksize)) total = _df.shape[0] # Count the number of iterations it_total = int(total/chunksize) + (total % chunksize != 0) pbar = tqdm(chunks, total=it_total, disable=(not self.progress_bar)) for index, chunk in enumerate(pbar): pbar.set_description("Uploading {}/{}".format(ix+1, len(self._tables))) # base64 encode the data encoder = Encoder(data_frame=chunk, dataset_type='multi') b64_enc = encoder.encode # form body of the request body = {"tableName": _name, "index": index + 1, "data": b64_enc} # make request to upload the data response = datasets.upload(connection=self._connection, dataset_id=self._dataset_id, session_id=self._session_id, body=body) if not response.ok: # on error, cancel the previously uploaded data datasets.publish_cancel(connection=self._connection, dataset_id=self._dataset_id, session_id=self._session_id) pbar.set_postfix(rows=min((index+1)*chunksize, total)) pbar.close() self._tables = []
def update(self, chunksize: int = 100000, auto_publish: bool = True): """Updates a super cube with new data. Args: chunksize (int, optional): Number of rows to transmit to the server with each request. auto_publish: If True, automatically publishes the data used to update the super cube definition to the super cube. If False, simply updates the super cube but does not publish it. """ # form request body and create a session for data uploads self.__form_upload_body() response = datasets.upload_session(connection=self._connection, id=self._id, body=self.__upload_body) response_json = response.json() self._session_id = response_json['uploadSessionId'] # upload each table for ix, _table in enumerate(self._tables): _df, _name = _table["data_frame"], _table["table_name"] # break the data up into chunks using a generator chunks = (_df[i:i + chunksize] for i in range(0, _df.shape[0], chunksize)) total = _df.shape[0] # Count the number of iterations it_total = int(total / chunksize) + (total % chunksize != 0) pbar = tqdm(chunks, total=it_total, disable=(not self._progress_bar)) for index, chunk in enumerate(pbar): pbar.set_description(f"Uploading {ix + 1}/{len(self._tables)}") # base64 encode the data encoder = Encoder(data_frame=chunk, dataset_type='multi') b64_enc = encoder.encode # form body of the request body = { "tableName": _name, "index": index + 1, "data": b64_enc, } # make request to upload the data response = datasets.upload( connection=self._connection, id=self._id, session_id=self._session_id, body=body, ) if not response.ok: # on error, cancel the previously uploaded data datasets.publish_cancel(connection=self._connection, id=self._id, session_id=self._session_id) pbar.set_postfix(rows=min((index + 1) * chunksize, total)) pbar.close() self._tables = [] # if desired, automatically publish the data to the new super cube if auto_publish: self.publish()