def to_iterator(self, raw_data_fd, download_time): gzip_decoder = GzipFile(fileobj=raw_data_fd, mode='r') reader = open_stream(gzip_decoder) it = PyArrowChunkIterator() for rb in reader: it.add_record_batch(rb) return it
def open_stream(source): """ pyarrow.open_stream deprecated since 0.12, use pyarrow.ipc.open_stream """ import warnings warnings.warn("pyarrow.open_stream is deprecated, please use " "pyarrow.ipc.open_stream") return ipc.open_stream(source)
def to_iterator(self, raw_data_fd): gzip_decoder = GzipFile(fileobj=raw_data_fd, mode='r') reader = open_stream(gzip_decoder) return ArrowChunkIterator(reader, self._meta)
def to_iterator(self, raw_data_fd, download_time): gzip_decoder = GzipFile(fileobj=raw_data_fd, mode='r') reader = open_stream(gzip_decoder) it = PyArrowChunkIterator(reader, self._arrow_context) return it
def chunk_info(self, data, use_ijson=False): is_dml = self._is_dml(data) self._query_result_format = data.get(u'queryResultFormat', u'json') if self._total_rowcount == -1 and not is_dml and data.get(u'total') \ is not None: self._total_rowcount = data['total'] self._description = [] self._column_idx_to_name = {} self._column_converter = [] self._return_row_method = self._arrow_return_row if \ self._query_result_format == 'arrow' else self._json_return_row for idx, column in enumerate(data[u'rowtype']): self._column_idx_to_name[idx] = column[u'name'] type_value = FIELD_NAME_TO_ID[column[u'type'].upper()] self._description.append( (column[u'name'], type_value, None, column[u'length'], column[u'precision'], column[u'scale'], column[u'nullable'])) self._column_converter.append( self._connection.converter.to_python_method( column[u'type'].upper(), column)) self._total_row_index = -1 # last fetched number of rows self._chunk_index = 0 self._chunk_count = 0 if self._query_result_format == 'arrow': # result as arrow chunk arrow_bytes = b64decode(data.get(u'rowsetBase64')) arrow_reader = open_stream(arrow_bytes) self._current_chunk_row = ArrowChunkIterator( arrow_reader, self._description) else: self._current_chunk_row = iter(data.get(u'rowset')) self._current_chunk_row_count = len(data.get(u'rowset')) if u'chunks' in data: chunks = data[u'chunks'] self._chunk_count = len(chunks) logger.debug(u'chunk size=%s', self._chunk_count) # prepare the downloader for further fetch qrmk = data[u'qrmk'] if u'qrmk' in data else None chunk_headers = None if u'chunkHeaders' in data: chunk_headers = {} for header_key, header_value in data[u'chunkHeaders'].items(): chunk_headers[header_key] = header_value logger.debug(u'added chunk header: key=%s, value=%s', header_key, header_value) logger.debug(u'qrmk=%s', qrmk) self._chunk_downloader = self._connection._chunk_downloader_class( chunks, self._connection, self, qrmk, chunk_headers, query_result_format=self._query_result_format, prefetch_threads=self._connection.client_prefetch_threads, use_ijson=use_ijson) if is_dml: updated_rows = 0 for idx, desc in enumerate(self._description): if desc[0] in ( u'number of rows updated', u'number of multi-joined rows updated', u'number of rows deleted') or \ desc[0].startswith(u'number of rows inserted'): updated_rows += int(data[u'rowset'][0][idx]) if self._total_rowcount == -1: self._total_rowcount = updated_rows else: self._total_rowcount += updated_rows