Ejemplo n.º 1
0
    def update(self, chunksize=100000):
        """Updates a dataset with new data.
        Args:
            chunksize (int, optional): Number of rows to transmit to the server with each request.
        """

        # form request body and create a session for data uploads
        self.__form_upload_body()
        response = datasets.upload_session(connection=self._connection,
                                           dataset_id=self._dataset_id, body=self.__upload_body)

        response_json = response.json()
        self._session_id = response_json['uploadSessionId']

        # upload each table
        for ix, _table in enumerate(self._tables):

            _df, _name = _table["data_frame"], _table["table_name"]

            # break the data up into chunks using a generator
            chunks = (_df[i:i + chunksize] for i in range(0, _df.shape[0], chunksize))

            total = _df.shape[0]

            # Count the number of iterations
            it_total = int(total/chunksize) + (total % chunksize != 0)

            pbar = tqdm(chunks, total=it_total, disable=(not self.progress_bar))
            for index, chunk in enumerate(pbar):
                pbar.set_description("Uploading {}/{}".format(ix+1, len(self._tables)))

                # base64 encode the data
                encoder = Encoder(data_frame=chunk, dataset_type='multi')
                b64_enc = encoder.encode

                # form body of the request
                body = {"tableName": _name,
                        "index": index + 1,
                        "data": b64_enc}

                # make request to upload the data
                response = datasets.upload(connection=self._connection,
                                           dataset_id=self._dataset_id,
                                           session_id=self._session_id,
                                           body=body)

                if not response.ok:
                    # on error, cancel the previously uploaded data
                    datasets.publish_cancel(connection=self._connection,
                                            dataset_id=self._dataset_id,
                                            session_id=self._session_id)

                pbar.set_postfix(rows=min((index+1)*chunksize, total))
            pbar.close()
        self._tables = []
Ejemplo n.º 2
0
    def update(self, chunksize: int = 100000, auto_publish: bool = True):
        """Updates a super cube with new data.

        Args:
            chunksize (int, optional): Number of rows to transmit to the server
                with each request.
            auto_publish: If True, automatically publishes the data used to
                update the super cube definition to the super cube. If False,
                simply updates the super cube but does not publish it.
        """

        # form request body and create a session for data uploads
        self.__form_upload_body()
        response = datasets.upload_session(connection=self._connection,
                                           id=self._id,
                                           body=self.__upload_body)

        response_json = response.json()
        self._session_id = response_json['uploadSessionId']

        # upload each table
        for ix, _table in enumerate(self._tables):

            _df, _name = _table["data_frame"], _table["table_name"]

            # break the data up into chunks using a generator
            chunks = (_df[i:i + chunksize]
                      for i in range(0, _df.shape[0], chunksize))

            total = _df.shape[0]

            # Count the number of iterations
            it_total = int(total / chunksize) + (total % chunksize != 0)

            pbar = tqdm(chunks,
                        total=it_total,
                        disable=(not self._progress_bar))
            for index, chunk in enumerate(pbar):
                pbar.set_description(f"Uploading {ix + 1}/{len(self._tables)}")

                # base64 encode the data
                encoder = Encoder(data_frame=chunk, dataset_type='multi')
                b64_enc = encoder.encode

                # form body of the request
                body = {
                    "tableName": _name,
                    "index": index + 1,
                    "data": b64_enc,
                }

                # make request to upload the data
                response = datasets.upload(
                    connection=self._connection,
                    id=self._id,
                    session_id=self._session_id,
                    body=body,
                )

                if not response.ok:
                    # on error, cancel the previously uploaded data
                    datasets.publish_cancel(connection=self._connection,
                                            id=self._id,
                                            session_id=self._session_id)

                pbar.set_postfix(rows=min((index + 1) * chunksize, total))
            pbar.close()
        self._tables = []

        # if desired, automatically publish the data to the new super cube
        if auto_publish:
            self.publish()