def test_from_rows(self): c = ColumnGroup.from_rows((int, str), [("colA", "colB"), (1, "a"), (2, "b"), (3, "c")]) self.assertEqual( ColumnGroup([ Column("colA", int, [1, 2, 3]), Column("colB", str, ["a", "b", "c"]) ]), c)
def read_csv(csv_path: Union[str, Path], mcsv_path: Optional[Union[str, Path]] = None, nrows=100) -> Optional[Data]: if isinstance(csv_path, str): csv_path = Path(csv_path) if mcsv_path is None: mcsv_path = csv_path.with_suffix(".mcsv") if not mcsv_path.is_file(): missing_mcsv(csv_path) # util command to open a window return None with mcsv.open_csv(csv_path, "r", mcsv_path) as mcsv_reader: if nrows >= 0: reader = islice(mcsv_reader, nrows) else: reader = mcsv_reader header = [to_standard(n) for n in next(reader)] column_group = ColumnGroup([ Column(name, description, values) for name, description, *values in zip( header, mcsv_reader.descriptions, *reader) ]) return Data( column_group, DataSource.create(to_standard(csv_path.stem), csv_path, mcsv_reader.meta_csv_data))
def data_from_rows(col_types, rows): columns = list(zip(*rows)) return Data( ColumnGroup([ Column(col[0], col_type, col[1:]) for col_type, col in zip(col_types, columns) ]), None)
def update(self, func, col_name=None, col_type=None): """ Update some column using a function. Syntax: `data[x].update(func)` * `x` is an index * `func` is a function of `data[x]` (use numeric indices) >>> test_data = original_test_data.copy() >>> print(test_data) A B C D 1 3 2 4 5 2 2 7 3 4 7 8 >>> test_data[1].update(lambda x: x*3) >>> print(test_data) A B C D 1 9 2 4 5 6 2 7 3 12 7 8 """ assert len(self._indices) == 1 index = self._indices[0] column = self._data_column_group[index] if col_type is None: col_type = self._get_new_col_type(func, column.col_type) if col_name is None: col_name = column.name columns = self._data_column_group.columns columns[index] = Column(col_name, col_type, [func(v) for v in column.col_values])
def _append_other_columns_and_put_rows(self, other_handle: "DataHandle", new_rows): columns = [ Column(col.name, col.col_type, []) for col in itertools.chain( self._data_column_group, other_handle._data_column_group) ] column_group = ColumnGroup(columns) column_group.replace_rows(new_rows) self._data_column_group.replace_columns(column_group.columns)
def merge(self, func, col_name, col_type=None): """ Create a new col by merging some columns. Those columns are consumed during the process. Syntax: `data[x].merge(func, col_name, [col_type])` * `x` is an index, slice or tuple of slices/indices of column_index * `func` is the function to apply to `x` values * `col_name` is the name of the new column * `col_type` is the type of the new column >>> test_data = original_test_data.copy() >>> print(test_data) A B C D 1 3 2 4 5 2 2 7 3 4 7 8 >>> test_data[:3].merge(lambda x, y, z: x+y+z, "E", int) >>> print(test_data) E D 6 4 9 7 14 8 """ if col_type is None: col_type = self._get_new_col_type(func, Any) # TODO: check if indices are always sorted columns = [ col for i, col in enumerate(self._data_column_group.columns) if i not in self._indices[1:] ] column = Column( col_name, col_type, [func(*vs) for vs in self._data_column_group.rows(self._indices)]) columns[self._indices[0]] = column self._data_column_group.replace_columns(columns)
def create(self, func, col_name, col_type=None, index=None): """ Create a new col Syntax: `data[x].create(func, col_name, [col_type, [index]])` * `x` is an index, slice or tuple of slices/indices of column_index * `func` is the function to apply to `x` values * `col_name` is the name of the new column * `col_type` is the type of the new column * `index` is the index of the new column >>> test_data = original_test_data.copy() >>> print(test_data) A B C D 1 3 2 4 5 2 2 7 3 4 7 8 >>> test_data[:3].create(lambda x, y, z: x+y+z, "E", int, 1) >>> print(test_data) A E B C D 1 6 3 2 4 5 9 2 2 7 3 14 4 7 8 """ if col_type is None: col_type = self._get_new_col_type(func, Any) columns = self._data_column_group.columns column = Column( col_name, col_type, [func(*vs) for vs in self._data_column_group.rows(self._indices)]) if index is None: columns.append(column) else: columns.insert(index, column) self._data_column_group.replace_columns(columns)
def test_col_eq(self): self.assertEqual(Column("colA", int, [1, 2, 3]), Column("colA", int, [1, 2, 3]))
description = TextFieldDescription.INSTANCE b.description_by_col_index(i, description) meta_csv_data = b.build() else: b = MetaCSVDataBuilder() cur_data = self._data_source.meta_csv_data b.encoding(cur_data.encoding) b.bom(cur_data.bom) b._dialect = cur_data.dialect for i, col in enumerate(self._column_group): description = self._data_source.get_description(col.col_info) b.description_by_col_index(i, description) meta_csv_data = b.build() return meta_csv_data if __name__ == "__main__": import doctest doctest.testmod( extraglobs={ 'original_test_data': Data( ColumnGroup([ Column("A", int, [1, 5, 3]), Column("B", int, [3, 2, 4]), Column("C", int, [2, 2, 7]), Column("D", int, [4, 7, 8]) ]), None) })