def read_row_group(self, i, columns=None, nthreads=None, use_threads=True, use_pandas_metadata=False): """ Read a single row group from a Parquet file Parameters ---------- columns: list If not None, only these columns will be read from the row group. A column name may be a prefix of a nested field, e.g. 'a' will select 'a.b', 'a.c', and 'a.d.e' use_threads : boolean, default True Perform multi-threaded column reads use_pandas_metadata : boolean, default False If True and file has custom pandas schema metadata, ensure that index columns are also loaded Returns ------- pyarrow.table.Table Content of the row group as a table (of columns) """ use_threads = _deprecate_nthreads(use_threads, nthreads) column_indices = self._get_column_indices( columns, use_pandas_metadata=use_pandas_metadata) return self.reader.read_row_group(i, column_indices=column_indices, use_threads=use_threads)
def read_table(source, columns=None, use_threads=True, metadata=None, use_pandas_metadata=False, nthreads=None): use_threads = _deprecate_nthreads(use_threads, nthreads) if _is_path_like(source): fs = _get_fs_from_path(source) return fs.read_parquet(source, columns=columns, use_threads=use_threads, metadata=metadata, use_pandas_metadata=use_pandas_metadata) pf = ParquetFile(source, metadata=metadata) return pf.read(columns=columns, use_threads=use_threads, use_pandas_metadata=use_pandas_metadata)