def group_by(self, key, key_name=None, key_type=None): """ Create a new :class:`Table` for unique value and return them as a :class:`.TableSet`. The :code:`key` can be either a column name or a function that returns a value to group by. Note that when group names will always be coerced to a string, regardless of the format of the input column. :param key: Either the name of a column from the this table to group by, or a :class:`function` that takes a row and returns a value to group by. :param key_name: A name that describes the grouped properties. Defaults to the column name that was grouped on or "group" if grouping with a key function. See :class:`.TableSet` for more. :param key_type: An instance some subclass of :class:`.DataType`. If not provided it will default to a :class`.Text`. :returns: A :class:`.TableSet` mapping where the keys are unique values from the :code:`key` and the values are new :class:`Table` instances containing the grouped rows. """ from agate.tableset import TableSet key_is_row_function = hasattr(key, '__call__') if key_is_row_function: key_name = key_name or 'group' key_type = key_type or Text() else: column = self._columns[key] key_name = key_name or column.name key_type = key_type or column.data_type groups = OrderedDict() for row in self._rows: if key_is_row_function: group_name = key(row) else: group_name = row[column.name] group_name = key_type.cast(group_name) if group_name not in groups: groups[group_name] = [] groups[group_name].append(row) output = OrderedDict() for group, rows in groups.items(): output[group] = self._fork(rows) return TableSet(output.values(), output.keys(), key_name=key_name, key_type=key_type)
def group_by(self, key, key_name=None, key_type=None): """ Create a new :class:`Table` for unique value and return them as a :class:`.TableSet`. The :code:`key` can be either a column name or a function that returns a value to group by. Note that when group names will always be coerced to a string, regardless of the format of the input column. :param key: Either the name of a column from the this table to group by, or a :class:`function` that takes a row and returns a value to group by. :param key_name: A name that describes the grouped properties. Defaults to the column name that was grouped on or "group" if grouping with a key function. See :class:`.TableSet` for more. :param key_type: An instance some subclass of :class:`.ColumnType`. If not provided it will default to a :class`.TextType`. :returns: A :class:`.TableSet` mapping where the keys are unique values from the :code:`key` and the values are new :class:`Table` instances containing the grouped rows. :raises: :exc:`.ColumnDoesNotExistError` """ key_is_row_function = hasattr(key, '__call__') if key_is_row_function: key_name = key_name or 'group' else: key_name = key_name or key try: i = self._column_names.index(key) except ValueError: raise ColumnDoesNotExistError(key) groups = OrderedDict() for row in self.rows: if key_is_row_function: group_name = six.text_type(key(row)) else: group_name = six.text_type(row[i]) # print group_name if group_name not in groups: groups[group_name] = [] groups[group_name].append(row) output = OrderedDict() for group, rows in groups.items(): output[group] = self._fork(rows) return TableSet(output, key_name=key_name, key_type=key_type)
def test_tableset_from_csv(self): tableset = TableSet.from_csv('examples/tableset', self.tester) self.assertSequenceEqual(tableset.column_names, ['letter', 'number']) self.assertSequenceEqual(tuple(map(type, tableset.column_types)), [Text, Number]) self.assertEqual(len(tableset['table1'].columns), 2) self.assertSequenceEqual(tableset['table1'].rows[0], ['a', 1]) self.assertSequenceEqual(tableset['table1'].rows[1], ['a', 3]) self.assertSequenceEqual(tableset['table1'].rows[2], ['b', 2])
def test_tableset_from_csv(self): tableset = TableSet.from_csv('examples/tableset', column_types=self.tester) self.assertSequenceEqual(tableset.column_names, ['letter', 'number']) self.assertSequenceEqual(tuple(map(type, tableset.column_types)), [Text, Number]) self.assertEqual(len(tableset['table1'].columns), 2) self.assertSequenceEqual(tableset['table1'].rows[0], ['a', 1]) self.assertSequenceEqual(tableset['table1'].rows[1], ['a', 3]) self.assertSequenceEqual(tableset['table1'].rows[2], ['b', 2])
def from_json(cls, path, column_names=None, column_types=None, keys=None, **kwargs): """ Create a new :class:`TableSet` from a directory of JSON files or a single JSON object with key value (Table key and list of row objects) pairs for each :class:`Table`. See :meth:`.Table.from_json` for additional details. :param path: Path to a directory containing JSON files or filepath/file-like object of nested JSON file. :param keys: A list of keys of the top-level dictionaries for each file. If specified, length must be equal to number of JSON files in path. :param column_types: See :meth:`Table.__init__`. """ from agate.tableset import TableSet if isinstance(path, six.string_types) and not os.path.isdir(path) and not os.path.isfile(path): raise IOError('Specified path doesn\'t exist.') tables = OrderedDict() if isinstance(path, six.string_types) and os.path.isdir(path): filepaths = glob(os.path.join(path, '*.json')) if keys is not None and len(keys) != len(filepaths): raise ValueError('If specified, keys must have length equal to number of JSON files') for i, filepath in enumerate(filepaths): name = os.path.split(filepath)[1].strip('.json') if keys is not None: tables[name] = Table.from_json(filepath, keys[i], column_types=column_types, **kwargs) else: tables[name] = Table.from_json(filepath, column_types=column_types, **kwargs) else: if hasattr(path, 'read'): js = json.load(path, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs) else: with open(path, 'r') as f: js = json.load(f, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs) for key, value in js.items(): tables[key] = Table.from_object(value, column_types=column_types, **kwargs) return TableSet(tables.values(), tables.keys())
def group_by(self, key): """ Create a new :class:`Table` for unique value and return them as a :class:`.TableSet`. The :code:`key` can be either a column name or a function that returns a value to group by. Note that when group names will always be coerced to a string, regardless of the format of the input column. :param key: Either the name of a column from the this table to group by, or a :class:`function` that takes a row and returns a value to group by. :returns: A :class:`.TableSet` mapping where the keys are unique values from the :code:`key` and the values are new :class:`Table` instances containing the grouped rows. :raises: :exc:`.ColumnDoesNotExistError` """ key_is_row_function = hasattr(key, '__call__') if not key_is_row_function: try: i = self._column_names.index(key) except ValueError: raise ColumnDoesNotExistError(key) groups = OrderedDict() for row in self.rows: if key_is_row_function: group_name = six.text_type(key(row)) else: group_name = six.text_type(row[i]) if group_name not in groups: groups[group_name] = [] groups[group_name].append(row) output = {} for group, rows in groups.items(): output[group] = self._fork(rows) return TableSet(output)
def from_csv(cls, dir_path, column_names=None, column_types=None, row_names=None, header=True, **kwargs): """ Create a new :class:`TableSet` from a directory of CSVs. See :meth:`.Table.from_csv` for additional details. :param dir_path: Path to a directory full of CSV files. All CSV files in this directory will be loaded. :param column_names: See :meth:`Table.__init__`. :param column_types: See :meth:`Table.__init__`. :param row_names: See :meth:`Table.__init__`. :param header: See :meth:`Table.from_csv`. """ from agate.tableset import TableSet if not os.path.isdir(dir_path): raise IOError('Specified path doesn\'t exist or isn\'t a directory.') tables = OrderedDict() for path in glob(os.path.join(dir_path, '*.csv')): name = os.path.split(path)[1].strip('.csv') tables[name] = Table.from_csv(path, column_names, column_types, row_names=row_names, header=header, **kwargs) return TableSet(tables.values(), tables.keys())
def test_tableset_from_csv_invalid_dir(self): with self.assertRaises(IOError): TableSet.from_csv('quack', self.tester)
def test_tableset_from_csv_no_headers(self): with self.assertRaises(ValueError): TableSet.from_csv('examples/tableset', self.tester, header=False)