def _read_parquet_row_group(fs, fn, index, columns, rg, series, categories, schema, cs, dt, scheme, storage_name_mapping, *args): from fastparquet.api import _pre_allocate from fastparquet.core import read_row_group_file name_storage_mapping = {v: k for k, v in storage_name_mapping.items()} if not isinstance(columns, (tuple, list)): columns = [columns,] series = True if index: index, = index if index not in columns: columns = columns + [index] columns = [name_storage_mapping.get(col, col) for col in columns] index = name_storage_mapping.get(index, index) df, views = _pre_allocate(rg.num_rows, columns, categories, index, cs, dt) read_row_group_file(fn, rg, columns, categories, schema, cs, open=fs.open, assign=views, scheme=scheme) if df.index.nlevels == 1: if index: df.index.name = storage_name_mapping.get(index, index) else: if index: df.index.names = [storage_name_mapping.get(name, name) for name in index] df.columns = [storage_name_mapping.get(col, col) for col in columns if col != index] if series: return df[df.columns[0]] else: return df
def _read_parquet_row_group(open, fn, index, columns, rg, series, categories, schema, cs, dt, scheme, storage_name_mapping, *args): from fastparquet.api import _pre_allocate from fastparquet.core import read_row_group_file name_storage_mapping = {v: k for k, v in storage_name_mapping.items()} if not isinstance(columns, (tuple, list)): columns = [columns,] series = True if index: index, = index if index not in columns: columns = columns + [index] columns = [name_storage_mapping.get(col, col) for col in columns] index = name_storage_mapping.get(index, index) df, views = _pre_allocate(rg.num_rows, columns, categories, index, cs, dt) read_row_group_file(fn, rg, columns, categories, schema, cs, open=open, assign=views, scheme=scheme) if df.index.nlevels == 1: if index: df.index.name = storage_name_mapping.get(index, index) else: if index: df.index.names = [storage_name_mapping.get(name, name) for name in index] df.columns = [storage_name_mapping.get(col, col) for col in columns if col != index] if series: return df[df.columns[0]] else: return df
def _read_parquet_row_group(open, fn, index, columns, rg, series, categories, schema, cs, dt, *args): if not isinstance(columns, (tuple, list)): columns = (columns,) series = True if index and index not in columns: columns = columns + type(columns)([index]) df, views = _pre_allocate(rg.num_rows, columns, categories, index, cs, dt) read_row_group_file(fn, rg, columns, categories, schema, cs, open=open, assign=views) if series: return df[df.columns[0]] else: return df
def _read_parquet_row_group(open, fn, index, columns, rg, series, categories, helper, cs, dt, *args): if not isinstance(columns, (tuple, list)): columns = (columns,) series = True if index and index not in columns: columns = columns + type(columns)([index]) df, views = _pre_allocate(rg.num_rows, columns, categories, index, cs, dt) read_row_group_file(fn, rg, columns, categories, helper, cs, open=open, assign=views) if series: return df[df.columns[0]] else: return df
def _read_parquet_row_group(open, fn, index, columns, rg, series, categories, schema, cs, dt, scheme, *args): from fastparquet.api import _pre_allocate from fastparquet.core import read_row_group_file if not isinstance(columns, (tuple, list)): columns = (columns,) series = True if index and index not in columns: columns = columns + type(columns)([index]) df, views = _pre_allocate(rg.num_rows, columns, categories, index, cs, dt) read_row_group_file(fn, rg, columns, categories, schema, cs, open=open, assign=views, scheme=scheme) if series: return df[df.columns[0]] else: return df