예제 #1
0
    def group_by(self, key, key_name=None, key_type=None):
        """
        Create a new :class:`Table` for unique value and return them as a
        :class:`.TableSet`. The :code:`key` can be either a column name
        or a function that returns a value to group by.

        Note that when group names will always be coerced to a string,
        regardless of the format of the input column.

        :param key: Either the name of a column from the this table
            to group by, or a :class:`function` that takes a row and returns
            a value to group by.
        :param key_name: A name that describes the grouped properties.
            Defaults to the column name that was grouped on or "group" if
            grouping with a key function. See :class:`.TableSet` for more.
        :param key_type: An instance some subclass of :class:`.DataType`. If
            not provided it will default to a :class`.Text`.
        :returns: A :class:`.TableSet` mapping where the keys are unique
            values from the :code:`key` and the values are new :class:`Table`
            instances containing the grouped rows.
        """
        from agate.tableset import TableSet

        key_is_row_function = hasattr(key, '__call__')

        if key_is_row_function:
            key_name = key_name or 'group'
            key_type = key_type or Text()
        else:
            column = self._columns[key]

            key_name = key_name or column.name
            key_type = key_type or column.data_type

        groups = OrderedDict()

        for row in self._rows:
            if key_is_row_function:
                group_name = key(row)
            else:
                group_name = row[column.name]

            group_name = key_type.cast(group_name)

            if group_name not in groups:
                groups[group_name] = []

            groups[group_name].append(row)

        output = OrderedDict()

        for group, rows in groups.items():
            output[group] = self._fork(rows)

        return TableSet(output.values(),
                        output.keys(),
                        key_name=key_name,
                        key_type=key_type)
예제 #2
0
파일: table.py 프로젝트: fagan2888/agate
    def group_by(self, key, key_name=None, key_type=None):
        """
        Create a new :class:`Table` for unique value and return them as a
        :class:`.TableSet`. The :code:`key` can be either a column name
        or a function that returns a value to group by.

        Note that when group names will always be coerced to a string,
        regardless of the format of the input column.

        :param key: Either the name of a column from the this table
            to group by, or a :class:`function` that takes a row and returns
            a value to group by.
        :param key_name: A name that describes the grouped properties.
            Defaults to the column name that was grouped on or "group" if
            grouping with a key function. See :class:`.TableSet` for more.
        :param key_type: An instance some subclass of :class:`.ColumnType`. If
            not provided it will default to a :class`.TextType`.
        :returns: A :class:`.TableSet` mapping where the keys are unique
            values from the :code:`key` and the values are new :class:`Table`
            instances containing the grouped rows.
        :raises: :exc:`.ColumnDoesNotExistError`
        """
        key_is_row_function = hasattr(key, '__call__')

        if key_is_row_function:
            key_name = key_name or 'group'
        else:
            key_name = key_name or key

            try:
                i = self._column_names.index(key)
            except ValueError:
                raise ColumnDoesNotExistError(key)

        groups = OrderedDict()

        for row in self.rows:
            if key_is_row_function:
                group_name = six.text_type(key(row))
            else:
                group_name = six.text_type(row[i])

            # print group_name

            if group_name not in groups:
                groups[group_name] = []

            groups[group_name].append(row)

        output = OrderedDict()

        for group, rows in groups.items():
            output[group] = self._fork(rows)

        return TableSet(output, key_name=key_name, key_type=key_type)
예제 #3
0
    def test_tableset_from_csv(self):
        tableset = TableSet.from_csv('examples/tableset', self.tester)

        self.assertSequenceEqual(tableset.column_names, ['letter', 'number'])
        self.assertSequenceEqual(tuple(map(type, tableset.column_types)), [Text, Number])

        self.assertEqual(len(tableset['table1'].columns), 2)

        self.assertSequenceEqual(tableset['table1'].rows[0], ['a', 1])
        self.assertSequenceEqual(tableset['table1'].rows[1], ['a', 3])
        self.assertSequenceEqual(tableset['table1'].rows[2], ['b', 2])
예제 #4
0
    def test_tableset_from_csv(self):
        tableset = TableSet.from_csv('examples/tableset', column_types=self.tester)

        self.assertSequenceEqual(tableset.column_names, ['letter', 'number'])
        self.assertSequenceEqual(tuple(map(type, tableset.column_types)), [Text, Number])

        self.assertEqual(len(tableset['table1'].columns), 2)

        self.assertSequenceEqual(tableset['table1'].rows[0], ['a', 1])
        self.assertSequenceEqual(tableset['table1'].rows[1], ['a', 3])
        self.assertSequenceEqual(tableset['table1'].rows[2], ['b', 2])
예제 #5
0
def from_json(cls, path, column_names=None, column_types=None, keys=None, **kwargs):
    """
    Create a new :class:`TableSet` from a directory of JSON files or a
    single JSON object with key value (Table key and list of row objects)
    pairs for each :class:`Table`.

    See :meth:`.Table.from_json` for additional details.

    :param path:
        Path to a directory containing JSON files or filepath/file-like
        object of nested JSON file.
    :param keys:
        A list of keys of the top-level dictionaries for each file. If
        specified, length must be equal to number of JSON files in path.
    :param column_types:
        See :meth:`Table.__init__`.
    """
    from agate.tableset import TableSet

    if isinstance(path, six.string_types) and not os.path.isdir(path) and not os.path.isfile(path):
        raise IOError('Specified path doesn\'t exist.')

    tables = OrderedDict()

    if isinstance(path, six.string_types) and os.path.isdir(path):
        filepaths = glob(os.path.join(path, '*.json'))

        if keys is not None and len(keys) != len(filepaths):
            raise ValueError('If specified, keys must have length equal to number of JSON files')

        for i, filepath in enumerate(filepaths):
            name = os.path.split(filepath)[1].strip('.json')

            if keys is not None:
                tables[name] = Table.from_json(filepath, keys[i], column_types=column_types, **kwargs)
            else:
                tables[name] = Table.from_json(filepath, column_types=column_types, **kwargs)

    else:
        if hasattr(path, 'read'):
            js = json.load(path, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)
        else:
            with open(path, 'r') as f:
                js = json.load(f, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)

        for key, value in js.items():
            tables[key] = Table.from_object(value, column_types=column_types, **kwargs)

    return TableSet(tables.values(), tables.keys())
예제 #6
0
    def group_by(self, key):
        """
        Create a new :class:`Table` for unique value and return them as a
        :class:`.TableSet`. The :code:`key` can be either a column name
        or a function that returns a value to group by.

        Note that when group names will always be coerced to a string,
        regardless of the format of the input column.

        :param key: Either the name of a column from the this table
            to group by, or a :class:`function` that takes a row and returns
            a value to group by.
        :returns: A :class:`.TableSet` mapping where the keys are unique
            values from the :code:`key` and the values are new :class:`Table`
            instances containing the grouped rows.
        :raises: :exc:`.ColumnDoesNotExistError`
        """
        key_is_row_function = hasattr(key, '__call__')

        if not key_is_row_function:
            try:
                i = self._column_names.index(key)
            except ValueError:
                raise ColumnDoesNotExistError(key)

        groups = OrderedDict()

        for row in self.rows:
            if key_is_row_function:
                group_name = six.text_type(key(row))
            else:
                group_name = six.text_type(row[i])

            if group_name not in groups:
                groups[group_name] = []

            groups[group_name].append(row)

        output = {}

        for group, rows in groups.items():
            output[group] = self._fork(rows)

        return TableSet(output)
예제 #7
0
def from_csv(cls,
             dir_path,
             column_names=None,
             column_types=None,
             row_names=None,
             header=True,
             **kwargs):
    """
    Create a new :class:`TableSet` from a directory of CSVs.

    See :meth:`.Table.from_csv` for additional details.

    :param dir_path:
        Path to a directory full of CSV files. All CSV files in this
        directory will be loaded.
    :param column_names:
        See :meth:`Table.__init__`.
    :param column_types:
        See :meth:`Table.__init__`.
    :param row_names:
        See :meth:`Table.__init__`.
    :param header:
        See :meth:`Table.from_csv`.
    """
    from agate.tableset import TableSet

    if not os.path.isdir(dir_path):
        raise IOError('Specified path doesn\'t exist or isn\'t a directory.')

    tables = OrderedDict()

    for path in glob(os.path.join(dir_path, '*.csv')):
        name = os.path.split(path)[1].strip('.csv')

        tables[name] = Table.from_csv(path,
                                      column_names,
                                      column_types,
                                      row_names=row_names,
                                      header=header,
                                      **kwargs)

    return TableSet(tables.values(), tables.keys())
예제 #8
0
 def test_tableset_from_csv_invalid_dir(self):
     with self.assertRaises(IOError):
         TableSet.from_csv('quack', self.tester)
예제 #9
0
 def test_tableset_from_csv_no_headers(self):
     with self.assertRaises(ValueError):
         TableSet.from_csv('examples/tableset', self.tester, header=False)
예제 #10
0
 def test_tableset_from_csv_invalid_dir(self):
     with self.assertRaises(IOError):
         TableSet.from_csv('quack', self.tester)
예제 #11
0
 def test_tableset_from_csv_no_headers(self):
     with self.assertRaises(ValueError):
         TableSet.from_csv('examples/tableset', self.tester, header=False)