def to_iterator(self, raw_data_fd, download_time):
     gzip_decoder = GzipFile(fileobj=raw_data_fd, mode='r')
     reader = open_stream(gzip_decoder)
     it = PyArrowChunkIterator()
     for rb in reader:
         it.add_record_batch(rb)
     return it
Esempio n. 2
0
def open_stream(source):
    """
    pyarrow.open_stream deprecated since 0.12, use pyarrow.ipc.open_stream
    """
    import warnings
    warnings.warn("pyarrow.open_stream is deprecated, please use "
                  "pyarrow.ipc.open_stream")
    return ipc.open_stream(source)
Esempio n. 3
0
def open_stream(source):
    """
    pyarrow.open_stream deprecated since 0.12, use pyarrow.ipc.open_stream
    """
    import warnings
    warnings.warn("pyarrow.open_stream is deprecated, please use "
                  "pyarrow.ipc.open_stream")
    return ipc.open_stream(source)
Esempio n. 4
0
 def to_iterator(self, raw_data_fd):
     gzip_decoder = GzipFile(fileobj=raw_data_fd, mode='r')
     reader = open_stream(gzip_decoder)
     return ArrowChunkIterator(reader, self._meta)
 def to_iterator(self, raw_data_fd, download_time):
     gzip_decoder = GzipFile(fileobj=raw_data_fd, mode='r')
     reader = open_stream(gzip_decoder)
     it = PyArrowChunkIterator(reader, self._arrow_context)
     return it
    def chunk_info(self, data, use_ijson=False):
        is_dml = self._is_dml(data)
        self._query_result_format = data.get(u'queryResultFormat', u'json')

        if self._total_rowcount == -1 and not is_dml and data.get(u'total') \
                is not None:
            self._total_rowcount = data['total']

        self._description = []
        self._column_idx_to_name = {}
        self._column_converter = []

        self._return_row_method = self._arrow_return_row if \
            self._query_result_format == 'arrow' else self._json_return_row

        for idx, column in enumerate(data[u'rowtype']):
            self._column_idx_to_name[idx] = column[u'name']
            type_value = FIELD_NAME_TO_ID[column[u'type'].upper()]
            self._description.append(
                (column[u'name'], type_value, None, column[u'length'],
                 column[u'precision'], column[u'scale'], column[u'nullable']))
            self._column_converter.append(
                self._connection.converter.to_python_method(
                    column[u'type'].upper(), column))

        self._total_row_index = -1  # last fetched number of rows

        self._chunk_index = 0
        self._chunk_count = 0
        if self._query_result_format == 'arrow':
            # result as arrow chunk
            arrow_bytes = b64decode(data.get(u'rowsetBase64'))
            arrow_reader = open_stream(arrow_bytes)
            self._current_chunk_row = ArrowChunkIterator(
                arrow_reader, self._description)
        else:
            self._current_chunk_row = iter(data.get(u'rowset'))
            self._current_chunk_row_count = len(data.get(u'rowset'))

        if u'chunks' in data:
            chunks = data[u'chunks']
            self._chunk_count = len(chunks)
            logger.debug(u'chunk size=%s', self._chunk_count)
            # prepare the downloader for further fetch
            qrmk = data[u'qrmk'] if u'qrmk' in data else None
            chunk_headers = None
            if u'chunkHeaders' in data:
                chunk_headers = {}
                for header_key, header_value in data[u'chunkHeaders'].items():
                    chunk_headers[header_key] = header_value
                    logger.debug(u'added chunk header: key=%s, value=%s',
                                 header_key, header_value)

            logger.debug(u'qrmk=%s', qrmk)
            self._chunk_downloader = self._connection._chunk_downloader_class(
                chunks,
                self._connection,
                self,
                qrmk,
                chunk_headers,
                query_result_format=self._query_result_format,
                prefetch_threads=self._connection.client_prefetch_threads,
                use_ijson=use_ijson)

        if is_dml:
            updated_rows = 0
            for idx, desc in enumerate(self._description):
                if desc[0] in (
                        u'number of rows updated',
                        u'number of multi-joined rows updated',
                        u'number of rows deleted') or \
                        desc[0].startswith(u'number of rows inserted'):
                    updated_rows += int(data[u'rowset'][0][idx])
            if self._total_rowcount == -1:
                self._total_rowcount = updated_rows
            else:
                self._total_rowcount += updated_rows