def read_table(source, columns=None, memory_map=True): """ Read a pyarrow.Table from Feather format Parameters ---------- source : str file path, or file-like object columns : sequence, optional Only read a specific set of columns. If not provided, all columns are read. memory_map : boolean, default True Use memory mapping when opening file on disk Returns ------- table : pyarrow.Table """ reader = ext.FeatherReader() reader.open(source, use_memory_map=memory_map) if columns is None: return reader.read() column_types = [type(column) for column in columns] if all(map(lambda t: t == int, column_types)): table = reader.read_indices(columns) elif all(map(lambda t: t == str, column_types)): table = reader.read_names(columns) else: column_type_names = [t.__name__ for t in column_types] raise TypeError("Columns must be indices or names. " "Got columns {} of types {}" .format(columns, column_type_names)) # Feather v1 already respects the column selection if reader.version < 3: return table # Feather v2 reads with sorted / deduplicated selection elif sorted(set(columns)) == columns: return table else: # follow exact order / selection of names new_fields = [table.schema.field(c) for c in columns] new_schema = schema(new_fields, metadata=table.schema.metadata) new_columns = [table.column(c) for c in columns] return Table.from_arrays(new_columns, schema=new_schema)
def read(self, columns=None, nthreads=1): if columns is not None: column_set = set(columns) else: column_set = None columns = [] names = [] for i in range(self.num_columns): name = self.get_column_name(i) if column_set is None or name in column_set: col = self.get_column(i) columns.append(col) names.append(name) table = Table.from_arrays(columns, names=names) return table.to_pandas(nthreads=nthreads)
def read_table(self, columns=None): if columns is not None: column_set = set(columns) else: column_set = None columns = [] names = [] for i in range(self.num_columns): name = self.get_column_name(i) if column_set is None or name in column_set: col = self.get_column(i) columns.append(col) names.append(name) table = Table.from_arrays(columns, names=names) return table