Ejemplo n.º 1
0
    def read_row_group(self,
                       i,
                       columns=None,
                       nthreads=None,
                       use_threads=True,
                       use_pandas_metadata=False):
        """
        Read a single row group from a Parquet file

        Parameters
        ----------
        columns: list
            If not None, only these columns will be read from the row group. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'
        use_threads : boolean, default True
            Perform multi-threaded column reads
        use_pandas_metadata : boolean, default False
            If True and file has custom pandas schema metadata, ensure that
            index columns are also loaded

        Returns
        -------
        pyarrow.table.Table
            Content of the row group as a table (of columns)
        """
        use_threads = _deprecate_nthreads(use_threads, nthreads)
        column_indices = self._get_column_indices(
            columns, use_pandas_metadata=use_pandas_metadata)
        return self.reader.read_row_group(i,
                                          column_indices=column_indices,
                                          use_threads=use_threads)
Ejemplo n.º 2
0
def read_table(source,
               columns=None,
               use_threads=True,
               metadata=None,
               use_pandas_metadata=False,
               nthreads=None):
    use_threads = _deprecate_nthreads(use_threads, nthreads)
    if _is_path_like(source):
        fs = _get_fs_from_path(source)
        return fs.read_parquet(source,
                               columns=columns,
                               use_threads=use_threads,
                               metadata=metadata,
                               use_pandas_metadata=use_pandas_metadata)

    pf = ParquetFile(source, metadata=metadata)
    return pf.read(columns=columns,
                   use_threads=use_threads,
                   use_pandas_metadata=use_pandas_metadata)