Beispiel #1
0
    def from_json(cls, path, keys=None, **kwargs):
        """
        Create a new :class:`TableSet` from a directory of JSON files or a 
        single JSON object with key value (Table key and list of row objects) 
        pairs for each :class:`Table`. 

        See :meth:`.Table.from_json` for additional details.

        :param path:
            Path to a directory containing JSON files or filepath/file-like 
            object of nested JSON file.
        :param keys:
            A list of keys of the top-level dictionaries for each file. If  
            specified, length must be equal to number of JSON files in path.
        """
        if isinstance(path, six.string_types) and not os.path.isdir(
                path) and not os.path.isfile(path):
            raise IOError('Specified path doesn\'t exist.')

        tables = OrderedDict()

        if isinstance(path, six.string_types) and os.path.isdir(path):
            filepaths = glob(os.path.join(path, '*.json'))

            if keys is not None and len(keys) != len(filepaths):
                raise ValueError(
                    'If specified, keys must have length equal to number of JSON files'
                )

            for i, filepath in enumerate(filepaths):
                name = os.path.split(filepath)[1].strip('.json')

                if keys is not None:
                    tables[name] = Table.from_json(filepath, keys[i], **kwargs)
                else:
                    tables[name] = Table.from_json(filepath, **kwargs)

        else:
            if hasattr(path, 'read'):
                js = json.load(path,
                               object_pairs_hook=OrderedDict,
                               parse_float=Decimal,
                               **kwargs)
            else:
                with open(path, 'r') as f:
                    js = json.load(f,
                                   object_pairs_hook=OrderedDict,
                                   parse_float=Decimal,
                                   **kwargs)

            for key, value in js.items():
                output = StringIO(json.dumps(value))
                tables[key] = Table.from_json(output)

        return TableSet(tables.values(), tables.keys())
Beispiel #2
0
    def test_monkeypatch_shadow(self):
        before_table = Table([['blah'], ], ['foo'], [Text()])

        Table.monkeypatch(TryPatchShadow)

        after_table = Table([['blah'], ], ['foo'], [Text()])

        self.assertIsInstance(before_table.columns, MappedSequence)
        self.assertIsInstance(after_table.columns, MappedSequence)

        with self.assertRaises(AttributeError):
            after_table.foo == 'foo'
Beispiel #3
0
    def test_monkeypatch_shadow(self):
        before_table = Table([['blah'], ], [('foo', Text())])

        Table.monkeypatch(TryPatchShadow)

        after_table = Table([['blah'], ], [('foo', Text())])

        self.assertIsInstance(before_table.columns, MappedSequence)
        self.assertIsInstance(after_table.columns, MappedSequence)

        with self.assertRaises(AttributeError):
            after_table.foo == 'foo'
Beispiel #4
0
def from_json(cls, path, column_names=None, column_types=None, keys=None, **kwargs):
    """
    Create a new :class:`TableSet` from a directory of JSON files or a
    single JSON object with key value (Table key and list of row objects)
    pairs for each :class:`Table`.

    See :meth:`.Table.from_json` for additional details.

    :param path:
        Path to a directory containing JSON files or filepath/file-like
        object of nested JSON file.
    :param keys:
        A list of keys of the top-level dictionaries for each file. If
        specified, length must be equal to number of JSON files in path.
    :param column_types:
        See :meth:`Table.__init__`.
    """
    from agate.tableset import TableSet

    if isinstance(path, six.string_types) and not os.path.isdir(path) and not os.path.isfile(path):
        raise IOError('Specified path doesn\'t exist.')

    tables = OrderedDict()

    if isinstance(path, six.string_types) and os.path.isdir(path):
        filepaths = glob(os.path.join(path, '*.json'))

        if keys is not None and len(keys) != len(filepaths):
            raise ValueError('If specified, keys must have length equal to number of JSON files')

        for i, filepath in enumerate(filepaths):
            name = os.path.split(filepath)[1].strip('.json')

            if keys is not None:
                tables[name] = Table.from_json(filepath, keys[i], column_types=column_types, **kwargs)
            else:
                tables[name] = Table.from_json(filepath, column_types=column_types, **kwargs)

    else:
        if hasattr(path, 'read'):
            js = json.load(path, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)
        else:
            with open(path, 'r') as f:
                js = json.load(f, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)

        for key, value in js.items():
            tables[key] = Table.from_object(value, column_types=column_types, **kwargs)

    return TableSet(tables.values(), tables.keys())
Beispiel #5
0
    def from_csv(cls, dir_path, column_names=None, column_types=None, row_names=None, header=True, **kwargs):
        """
        Create a new :class:`TableSet` from a directory of CSVs.

        See :meth:`.Table.from_csv` for additional details.

        :param dir_path:
            Path to a directory full of CSV files. All CSV files in this
            directory will be loaded.
        :param column_names:
            See :meth:`Table.__init__`.
        :param column_types:
            See :meth:`Table.__init__`.
        :param row_names:
            See :meth:`Table.__init__`.
        :param header:
            See :meth:`Table.from_csv`.
        """
        if not os.path.isdir(dir_path):
            raise IOError('Specified path doesn\'t exist or isn\'t a directory.')

        tables = OrderedDict()

        for path in glob(os.path.join(dir_path, '*.csv')):
            name = os.path.split(path)[1].strip('.csv')

            tables[name] = Table.from_csv(path, column_names, column_types, row_names=row_names, header=header, **kwargs)

        return TableSet(tables.values(), tables.keys())
Beispiel #6
0
    def from_csv(cls, dir_path, column_info, header=True, **kwargs):
        """
        Create a new :class:`TableSet` from a directory of CSVs. This method
        will use csvkit if it is available, otherwise it will use Python's
        builtin csv module.

        ``kwargs`` will be passed through to :meth:`csv.reader`.

        If you are using Python 2 and not using csvkit, this method is not
        unicode-safe.

        :param dir_path: Path to a directory full of CSV files. All CSV files
            in this directory will be loaded.
        :param column_info: See :class:`.Table` constructor.
        :param header: If `True`, the first row of the CSV is assumed to contains
            headers and will be skipped.
        """
        from agate.table import Table

        if not os.path.isdir(dir_path):
            raise IOError(
                'Specified path doesn\'t exist or isn\'t a directory.')

        tables = OrderedDict()

        for path in glob(os.path.join(dir_path, '*.csv')):
            name = os.path.split(path)[1].strip('.csv')
            table = Table.from_csv(path, column_info, header=header, **kwargs)

            tables[name] = table

        return TableSet(tables)
Beispiel #7
0
    def from_csv(cls, dir_path, column_names=None, column_types=None, row_names=None, header=True, **kwargs):
        """
        Create a new :class:`TableSet` from a directory of CSVs.

        See :meth:`.Table.from_csv` for additional details.

        :param dir_path:
            Path to a directory full of CSV files. All CSV files in this
            directory will be loaded.
        :param column_names:
            See :meth:`Table.__init__`.
        :param column_types:
            See :meth:`Table.__init__`.
        :param row_names:
            See :meth:`Table.__init__`.
        :param header:
            See :meth:`Table.from_csv`.
        """
        if not os.path.isdir(dir_path):
            raise IOError('Specified path doesn\'t exist or isn\'t a directory.')

        tables = OrderedDict()

        for path in glob(os.path.join(dir_path, '*.csv')):
            name = os.path.split(path)[1].strip('.csv')

            tables[name] = Table.from_csv(path, column_names, column_types, row_names=row_names, header=header, **kwargs)

        return TableSet(tables.values(), tables.keys())
Beispiel #8
0
def aggregate(self, aggregations):
    """
    Aggregate data from the tables in this set by performing some
    set of column operations on the groups and coalescing the results into
    a new :class:`.Table`.

    :code:`aggregations` must be a sequence of tuples, where each has two
    parts: a :code:`new_column_name` and a :class:`.Aggregation` instance.

    The resulting table will have the keys from this :class:`TableSet` (and
    any nested TableSets) set as its :code:`row_names`. See
    :meth:`.Table.__init__` for more details.

    :param aggregations:
        A list of tuples in the format
        :code:`(new_column_name, aggregation)`.
    :returns:
        A new :class:`.Table`.
    """
    column_names, column_types, output, row_name_columns = _aggregate(
        self, aggregations)

    if len(row_name_columns) == 1:
        row_names = row_name_columns[0]
    else:

        def row_names(r):
            return tuple(r[n] for n in row_name_columns)

    return Table(output, column_names, column_types, row_names=row_names)
Beispiel #9
0
    def from_csv(cls,
                 dir_path,
                 column_info,
                 row_names=None,
                 header=True,
                 **kwargs):
        """
        Create a new :class:`TableSet` from a directory of CSVs. This method
        will use csvkit if it is available, otherwise it will use Python's
        builtin csv module.

        ``kwargs`` will be passed through to :meth:`csv.reader`.

        If you are using Python 2 and not using csvkit, this method is not
        unicode-safe.

        :param dir_path: Path to a directory full of CSV files. All CSV files
            in this directory will be loaded.
        :param column_info: A sequence of pairs of column names and types. The latter
            must be instances of :class:`.DataType`. Or, an instance of
            :class:`.TypeTester` to infer types.
        :param row_names: See :meth:`Table.__init__`.
        :param header: If `True`, the first row of the CSV is assumed to contains
            headers and will be skipped.
        """
        use_inference = isinstance(column_info, TypeTester)

        if use_inference and not header:
            raise ValueError(
                'Can not apply TypeTester to a CSV without headers.')

        if not os.path.isdir(dir_path):
            raise IOError(
                'Specified path doesn\'t exist or isn\'t a directory.')

        tables = OrderedDict()

        if use_inference:
            has_inferred_columns = False

        for path in glob(os.path.join(dir_path, '*.csv')):
            name = os.path.split(path)[1].strip('.csv')

            table = Table.from_csv(path,
                                   column_info,
                                   row_names=row_names,
                                   header=header,
                                   **kwargs)

            if use_inference and not has_inferred_columns:
                column_info = tuple(zip(table.column_names,
                                        table.column_types))
                has_inferred_columns = True

            tables[name] = table

        return TableSet(tables.values(), tables.keys())
Beispiel #10
0
def merge(cls, tables, row_names=None, column_names=None):
    """
    Create a new table from a sequence of similar tables.

    This method will not carry over row names from the merged tables, but new
    row names can be specified with the :code:`row_names` argument.

    It is possible to limit the columns included in the new :class:`.Table`
    with :code:`column_names` argument. For example, to only include columns
    from a specific table, set :code:`column_names` equal to
    :code:`table.column_names`.

    :param tables:
        An sequence of :class:`.Table` instances.
    :param row_names:
        See :class:`.Table` for the usage of this parameter.
    :param column_names:
        A sequence of column names to include in the new :class:`.Table`. If
        not specified, all distinct column names from `tables` are included.
    :returns:
        A new :class:`.Table`.
    """
    from agate.table import Table

    new_columns = OrderedDict()

    for table in tables:
        for i in range(0, len(table.columns)):
            if column_names is None or table.column_names[i] in column_names:
                column_name = table.column_names[i]
                column_type = table.column_types[i]

                if column_name in new_columns:
                    if not isinstance(column_type, type(new_columns[column_name])):
                        raise DataTypeError('Tables contain columns with the same names, but different types.')
                else:
                    new_columns[column_name] = column_type

    column_keys = new_columns.keys()
    column_types = new_columns.values()

    rows = []

    for table in tables:
        # Performance optimization for identical table structures
        if table.column_names == column_keys and table.column_types == column_types:
            rows.extend(table.rows)
        else:
            for row in table.rows:
                data = []

                for column_key in column_keys:
                    data.append(row.get(column_key, None))

                rows.append(Row(data, column_keys))

    return Table(rows, column_keys, column_types, row_names=row_names, _is_fork=True)
Beispiel #11
0
    def test_table_from_csv(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        if six.PY2:
            table = Table.from_csv('examples/test.csv', self.tester, encoding='utf8')
        else:
            table = Table.from_csv('examples/test.csv', self.tester)

        self.assertSequenceEqual(table.get_column_names(), ['one', 'two', 'three'])
        self.assertSequenceEqual(tuple(map(type, table.get_column_types())), [Number, Number, Text])

        self.assertEqual(len(table.columns), 3)

        self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
        self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
        self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])
Beispiel #12
0
def print_structure(self, output=sys.stdout, max_rows=None):
    """
    Print this table's column names and types as a plain-text table.

    :param output:
        The output to print to.
    """
    from agate.table import Table

    name_column = [n for n in self._column_names]
    type_column = [t.__class__.__name__ for t in self._column_types]
    rows = zip(name_column, type_column)
    column_names = ['column', 'data_type']
    text = Text()
    column_types = [text, text]

    table = Table(rows, column_names, column_types)

    return table.print_table(output=output, max_column_width=None, max_rows=max_rows)
Beispiel #13
0
def print_structure(self, output=sys.stdout):
    """
    Print this table's column names and types as a plain-text table.

    :param output:
        The output to print to.
    """
    from agate.table import Table

    name_column = [n for n in self.column_names]
    type_column = [t.__class__.__name__ for t in self.column_types]
    rows = zip(name_column, type_column)
    column_names = ['column', 'data_type']
    text = Text()
    column_types = [text, text]

    table = Table(rows, column_names, column_types)

    return table.print_table(output=output, max_column_width=None)
Beispiel #14
0
    def test_table_from_csv(self):
        table = Table.from_csv('examples/test.csv', self.tester)

        self.assertSequenceEqual(table.column_names, ['one', 'two', 'three'])
        self.assertSequenceEqual(tuple(map(type, table.column_types)), [Number, Number, Text])

        self.assertEqual(len(table.columns), 3)

        self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
        self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
        self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])
Beispiel #15
0
    def test_table_from_csv(self):
        table = Table.from_csv('examples/test.csv', column_types=self.tester)

        self.assertSequenceEqual(table.column_names, ['one', 'two', 'three'])
        self.assertSequenceEqual(tuple(map(type, table.column_types)), [Number, Number, Text])

        self.assertEqual(len(table.columns), 3)

        self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
        self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
        self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])
Beispiel #16
0
def from_json(cls, path, row_names=None, key=None, newline=False, column_types=None, **kwargs):
    """
    Create a new table from a JSON file.

    Once the JSON has been deseralized, the resulting Python object is
    passed to :meth:`.Table.from_object`.

    If the file contains a top-level dictionary you may specify what
    property contains the row list using the :code:`key` parameter.

    :code:`kwargs` will be passed through to :meth:`json.load`.

    :param path:
        Filepath or file-like object from which to read JSON data.
    :param row_names:
        See the :meth:`.Table.__init__`.
    :param key:
        The key of the top-level dictionary that contains a list of row
        arrays.
    :param newline:
        If `True` then the file will be parsed as "newline-delimited JSON".
    :param column_types:
        See :meth:`.Table.__init__`.
    """
    from agate.table import Table

    if key is not None and newline:
        raise ValueError('key and newline may not be specified together.')

    if newline:
        js = []

        if hasattr(path, 'read'):
            for line in path:
                js.append(json.loads(line, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs))
        else:
            with open(path, 'r') as f:
                for line in f:
                    js.append(json.loads(line, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs))
    else:
        if hasattr(path, 'read'):
            js = json.load(path, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)
        else:
            with open(path, 'r') as f:
                js = json.load(f, object_pairs_hook=OrderedDict, parse_float=Decimal, **kwargs)

    if isinstance(js, dict):
        if not key:
            raise TypeError('When converting a JSON document with a top-level dictionary element, a key must be specified.')

        js = js[key]

    return Table.from_object(js, row_names=row_names, column_types=column_types)
Beispiel #17
0
    def test_table_from_csv(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        if six.PY2:
            table = Table.from_csv('examples/test.csv',
                                   self.tester,
                                   encoding='utf8')
        else:
            table = Table.from_csv('examples/test.csv', self.tester)

        self.assertSequenceEqual(table.column_names, ['one', 'two', 'three'])
        self.assertSequenceEqual(tuple(map(type, table.column_types)),
                                 [Number, Number, Text])

        self.assertEqual(len(table.columns), 3)

        self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
        self.assertSequenceEqual(table.rows[1], [2, 3, 'b'])
        self.assertSequenceEqual(table.rows[2], [None, 2, u'👍'])
Beispiel #18
0
def rename(self, column_names=None, row_names=None, slug_columns=False, slug_rows=False, **kwargs):
    """
    Create a copy of this table with different column names or row names.

    By enabling :code:`slug_columns` or :code:`slug_rows` and not specifying
    new names you may slugify the table's existing names.

    :code:`kwargs` will be passed through to
    `awesome-slugify's <https://github.com/dimka665/awesome-slugify>`_
    :code:`UniqueSlugify` class.

    :param column_names:
        New column names for the renamed table. May be either an array or
        a dictionary mapping existing column names to new names. If not
        specified, will use this table's existing column names.
    :param row_names:
        New row names for the renamed table. May be either an array or
        a dictionary mapping existing row names to new names. If not
        specified, will use this table's existing row names.
    :param slug_columns:
        If True, column names will be converted to slugs and duplicate names
        will have unique identifiers appended.
    :param slug_rows:
        If True, row names will be converted to slugs and dupicate names will
        have unique identifiers appended.
    """
    from agate.table import Table

    if isinstance(column_names, dict):
        column_names = [column_names[name] if name in column_names else name for name in self._column_names]

    if isinstance(row_names, dict):
        row_names = [row_names[name] if name in row_names else name for name in self._row_names]

    if slug_columns:
        column_names = column_names or self._column_names

        if column_names is not None:
            column_names = utils.slugify(column_names, ensure_unique=True, **kwargs)

    if slug_rows:
        row_names = row_names or self.row_names

        if row_names is not None:
            row_names = utils.slugify(row_names, ensure_unique=True, **kwargs)

    if column_names is not None and column_names != self._column_names:
        if row_names is None:
            row_names = self._row_names

        return Table(self._rows, column_names, self._column_types, row_names=row_names, _is_fork=False)
    else:
        return self._fork(self._rows, column_names, self._column_types, row_names=row_names)
Beispiel #19
0
    def test_monkeypatch(self):
        before_table = Table([], ['foo'], [Text()])

        Table.monkeypatch(TryPatch)

        after_table = Table([], ['foo'], [Text()])

        self.assertSequenceEqual(Table.__bases__, [Patchable, TryPatch])

        self.assertIsNotNone(getattr(before_table, 'test'))
        self.assertIsNotNone(getattr(before_table, 'testcls'))

        self.assertIsNotNone(getattr(after_table, 'test'))
        self.assertIsNotNone(getattr(after_table, 'testcls'))

        self.assertEqual(before_table.test(5), 5)
        self.assertEqual(after_table.test(5), 5)
        self.assertEqual(Table.testcls(5), 5)
def print_structure(self, max_rows=20, output=sys.stdout):
    """
    Print the keys and row counts of each table in the tableset.

    :param max_rows:
        The maximum number of rows to display before truncating the data.
        Defaults to 20.
    :param output:
        The output used to print the structure of the :class:`Table`.
    :returns:
        None
    """
    max_length = min(len(self.items()), max_rows)

    name_column = self.keys()[0:max_length]
    type_column = [str(len(table.rows)) for key, table in self.items()[0:max_length]]
    rows = zip(name_column, type_column)
    column_names = ['table', 'rows']
    text = Text()
    column_types = [text, text]

    table = Table(rows, column_names, column_types)

    return table.print_table(output=output, max_column_width=None)
Beispiel #21
0
    def from_csv(cls, dir_path, column_info, header=True, **kwargs):
        """
        Create a new :class:`TableSet` from a directory of CSVs. This method
        will use csvkit if it is available, otherwise it will use Python's
        builtin csv module.

        ``kwargs`` will be passed through to :meth:`csv.reader`.

        If you are using Python 2 and not using csvkit, this method is not
        unicode-safe.

        :param dir_path: Path to a directory full of CSV files. All CSV files
            in this directory will be loaded.
        :param column_info: A sequence of pairs of column names and types. The latter
            must be instances of :class:`.DataType`. Or, an instance of
            :class:`.TypeTester` to infer types.
        :param header: If `True`, the first row of the CSV is assumed to contains
            headers and will be skipped.
        """
        from agate.table import Table

        use_inference = isinstance(column_info, TypeTester)

        if use_inference and not header:
            raise ValueError('Can not apply TypeTester to a CSV without headers.')

        if not os.path.isdir(dir_path):
            raise IOError('Specified path doesn\'t exist or isn\'t a directory.')

        tables = OrderedDict()

        if use_inference:
            has_inferred_columns = False

        for path in glob(os.path.join(dir_path, '*.csv')):
            name = os.path.split(path)[1].strip('.csv')

            table = Table.from_csv(path, column_info, header=header, **kwargs)

            if use_inference and not has_inferred_columns:
                column_info = tuple(zip(table.get_column_names(), table.get_column_types()))
                has_inferred_columns = True

            tables[name] = table

        return TableSet(tables)
Beispiel #22
0
    def merge(self, groups=None, group_name=None, group_type=None):
        """
        Convert this TableSet into a single table. This is the inverse of
        :meth:`.Table.group_by`.

        Any `row_names` set on the merged tables will be lost in this
        process.

        :param groups:
            A list of grouping factors to add to merged rows in a new column. 
            If specified, it should have exactly one element per :class:`Table` 
            in the :class:`TableSet`. If not specified or None, the grouping 
            factor will be the name of the :class:`Row`'s original Table.
        :param group_name:
            This will be the column name of the grouping factors. If None, 
            defaults to the :attr:`TableSet.key_name`.
        :param group_type:
            This will be the column type of the grouping factors. If None, 
            defaults to the :attr:`TableSet.key_type`.
        :returns:
            A new :class:`Table`.
        """
        if type(groups) is not list and groups is not None:
            raise ValueError('Groups must be None or a list.')

        if type(groups) is list and len(groups) != len(self):
            raise ValueError('Groups length must be equal to TableSet length.')

        column_names = list(self.column_names)
        column_types = list(self.column_types)

        column_names.insert(0, group_name if group_name else self.key_name)
        column_types.insert(0, group_type if group_type else self.key_type)

        rows = []

        for index, (key, table) in enumerate(self.items()):
            for row in table.rows:
                if groups is None:
                    rows.append(Row((key, ) + tuple(row), column_names))
                else:
                    rows.append(
                        Row((groups[index], ) + tuple(row), column_names))

        return Table(rows, column_names, column_types)
Beispiel #23
0
    def from_csv(cls, dir_path, column_info, row_names=None, header=True, **kwargs):
        """
        Create a new :class:`TableSet` from a directory of CSVs.

        See :meth:`.Table.from_csv` for additional details.

        :param dir_path:
            Path to a directory full of CSV files. All CSV files in this
            directory will be loaded.
        :param column_info:
            A sequence of pairs of column names and types. The latter must be
            instances of :class:`.DataType`. Or, an instance of
            :class:`.TypeTester` to infer types.
        :param row_names:
            See :meth:`Table.__init__`.
        :param header:
            If `True`, the first row of the CSV is assumed to contains headers
            and will be skipped.
        """
        use_inference = isinstance(column_info, TypeTester)

        if use_inference and not header:
            raise ValueError('Can not apply TypeTester to a CSV without headers.')

        if not os.path.isdir(dir_path):
            raise IOError('Specified path doesn\'t exist or isn\'t a directory.')

        tables = OrderedDict()

        if use_inference:
            has_inferred_columns = False

        for path in glob(os.path.join(dir_path, '*.csv')):
            name = os.path.split(path)[1].strip('.csv')

            table = Table.from_csv(path, column_info, row_names=row_names, header=header, **kwargs)

            if use_inference and not has_inferred_columns:
                column_info = tuple(zip(table.column_names, table.column_types))
                has_inferred_columns = True

            tables[name] = table

        return TableSet(tables.values(), tables.keys())
Beispiel #24
0
    def test_monkeypatch(self):
        before_table = Table([], [('foo', Text())])

        Table.monkeypatch(TryPatch)

        after_table = Table([], [('foo', Text())])

        self.assertSequenceEqual(Table.__bases__, [Patchable, TryPatch])

        self.assertIsNotNone(getattr(before_table, 'test'))
        self.assertIsNotNone(getattr(before_table, 'testcls'))

        self.assertIsNotNone(getattr(after_table, 'test'))
        self.assertIsNotNone(getattr(after_table, 'testcls'))

        self.assertEqual(before_table.test(5), 5)
        self.assertEqual(after_table.test(5), 5)
        self.assertEqual(Table.testcls(5), 5)
Beispiel #25
0
    def merge(self):
        """
        Convert this TableSet into a single table. This is the inverse of
        :meth:`.Table.group_by`.

        Any :code:`row_names` set on the merged tables will be lost in this
        process.

        :returns: A new :class:`Table`.
        """
        column_names = list(self.column_names)
        column_types = list(self.column_types)

        column_names.insert(0, self.key_name)
        column_types.insert(0, self.key_type)

        rows = []

        for key, table in self.items():
            for row in table.rows:
                rows.append(Row((key,) + tuple(row), column_names))

        return Table(rows, column_names, column_types)
Beispiel #26
0
def from_object(cls, obj, row_names=None, column_types=None):
    """
    Create a new table from a Python object.

    The object should be a list containing a dictionary for each "row".
    Nested objects or lists will also be parsed. For example, this object:

    .. code-block:: python

        {
            'one': {
                'a': 1,
                'b': 2,
                'c': 3
            },
            'two': [4, 5, 6],
            'three': 'd'
        }

    Would generate these columns and values:

    .. code-block:: python

        {
            'one/a': 1,
            'one/b': 2,
            'one/c': 3,
            'two.0': 4,
            'two.1': 5,
            'two.2': 6,
            'three': 'd'
        }

    Column names and types will be inferred from the data.

    Not all rows are required to have the same keys. Missing elements will
    be filled in with null values.

    :param obj:
        Filepath or file-like object from which to read JSON data.
    :param row_names:
        See :meth:`.Table.__init__`.
    :param column_types:
        See :meth:`.Table.__init__`.
    """
    from agate.table import Table

    column_names = []
    row_objects = []

    for sub in obj:
        parsed = utils.parse_object(sub)

        for key in parsed.keys():
            if key not in column_names:
                column_names.append(key)

        row_objects.append(parsed)

    rows = []

    for sub in row_objects:
        r = []

        for name in column_names:
            r.append(sub.get(name, None))

        rows.append(r)

    return Table(rows, column_names, row_names=row_names, column_types=column_types)
Beispiel #27
0
    def test_monkeypatch_deprecated(self):
        with warnings.catch_warnings():
            warnings.simplefilter('error')

            with self.assertRaises(DeprecationWarning):
                Table.monkeypatch(TryPatch)
Beispiel #28
0
    def test_monkeypatch_double(self):
        Table.monkeypatch(TryPatch)
        Table.monkeypatch(TryPatch)
        Table.monkeypatch(TryPatch)

        self.assertSequenceEqual(Table.__bases__, [Patchable, TryPatch])
Beispiel #29
0
def denormalize(self,
                key=None,
                property_column='property',
                value_column='value',
                default_value=utils.default,
                column_types=None):
    """
    Create a new table with row values converted into columns.

    For example:

    +---------+-----------+---------+
    |  name   | property  | value   |
    +=========+===========+=========+
    |  Jane   | gender    | female  |
    +---------+-----------+---------+
    |  Jane   | race      | black   |
    +---------+-----------+---------+
    |  Jane   | age       | 24      |
    +---------+-----------+---------+
    |  ...    |  ...      |  ...    |
    +---------+-----------+---------+

    Can be denormalized so that each unique value in `field` becomes a
    column with `value` used for its values.

    +---------+----------+--------+-------+
    |  name   | gender   | race   | age   |
    +=========+==========+========+=======+
    |  Jane   | female   | black  | 24    |
    +---------+----------+--------+-------+
    |  Jack   | male     | white  | 35    |
    +---------+----------+--------+-------+
    |  Joe    | male     | black  | 28    |
    +---------+----------+--------+-------+

    If one or more keys are specified then the resulting table will
    automatically have :code:`row_names` set to those keys.

    This is the opposite of :meth:`.Table.normalize`.

    :param key:
        A column name or a sequence of column names that should be
        maintained as they are in the normalized table. Typically these
        are the tables unique identifiers and any metadata about them. Or,
        :code:`None` if there are no key columns.
    :param field_column:
        The column whose values should become column names in the new table.
    :param property_column:
        The column whose values should become the values of the property
        columns in the new table.
    :param default_value:
        Value to be used for missing values in the pivot table. If not
        specified :code:`Decimal(0)` will be used for aggregations that
        return :class:`.Number` data and :code:`None` will be used for
        all others.
    :param column_types:
        A sequence of column types with length equal to number of unique
        values in field_column or an instance of :class:`.TypeTester`.
        Defaults to a generic :class:`.TypeTester`.
    :returns:
        A new :class:`.Table`.
    """
    from agate.table import Table

    if key is None:
        key = []
    elif not utils.issequence(key):
        key = [key]

    field_names = []
    row_data = OrderedDict()

    for row in self.rows:
        row_key = tuple(row[k] for k in key)

        if row_key not in row_data:
            row_data[row_key] = OrderedDict()

        f = six.text_type(row[property_column])
        v = row[value_column]

        if f not in field_names:
            field_names.append(f)

        row_data[row_key][f] = v

    if default_value == utils.default:
        if isinstance(self.columns[value_column].data_type, Number):
            default_value = Decimal(0)
        else:
            default_value = None

    new_column_names = key + field_names

    new_rows = []
    row_names = []

    for k, v in row_data.items():
        row = list(k)

        if len(k) == 1:
            row_names.append(k[0])
        else:
            row_names.append(k)

        for f in field_names:
            if f in v:
                row.append(v[f])
            else:
                row.append(default_value)

        new_rows.append(Row(row, new_column_names))

    key_column_types = [
        self.column_types[self.column_names.index(name)] for name in key
    ]

    if column_types is None or isinstance(column_types, TypeTester):
        tester = TypeTester() if column_types is None else column_types
        force_update = dict(zip(key, key_column_types))
        force_update.update(tester._force)
        tester._force = force_update

        new_column_types = tester.run(new_rows, new_column_names)
    else:
        new_column_types = key_column_types + list(column_types)

    return Table(new_rows,
                 new_column_names,
                 new_column_types,
                 row_names=row_names)
Beispiel #30
0
    def test_monkeypatch_double(self):
        Table.monkeypatch(TryPatch)
        Table.monkeypatch(TryPatch)
        Table.monkeypatch(TryPatch)

        self.assertSequenceEqual(Table.__bases__, [Patchable, TryPatch])
Beispiel #31
0
def normalize(self,
              key,
              properties,
              property_column='property',
              value_column='value',
              column_types=None):
    """
    Create a new table with columns converted into rows values.

    For example:

    +---------+----------+--------+-------+
    |  name   | gender   | race   | age   |
    +=========+==========+========+=======+
    |  Jane   | female   | black  | 24    |
    +---------+----------+--------+-------+
    |  Jack   | male     | white  | 35    |
    +---------+----------+--------+-------+
    |  Joe    | male     | black  | 28    |
    +---------+----------+--------+-------+

    can be normalized on columns 'gender', 'race' and 'age':

    +---------+-----------+---------+
    |  name   | property  | value   |
    +=========+===========+=========+
    |  Jane   | gender    | female  |
    +---------+-----------+---------+
    |  Jane   | race      | black   |
    +---------+-----------+---------+
    |  Jane   | age       | 24      |
    +---------+-----------+---------+
    |  ...    |  ...      |  ...    |
    +---------+-----------+---------+

    This is the opposite of :meth:`.Table.denormalize`.

    :param key:
        A column name or a sequence of column names that should be
        maintained as they are in the normalized self. Typically these
        are the tables unique identifiers and any metadata about them.
    :param properties:
        A column name or a sequence of column names that should be
        converted to properties in the new self.
    :param property_column:
        The name to use for the column containing the property names.
    :param value_column:
        The name to use for the column containing the property values.
    :param column_types:
        A sequence of two column types for the property and value column in
        that order or an instance of :class:`.TypeTester`. Defaults to a
        generic :class:`.TypeTester`.
    :returns:
        A new :class:`.Table`.
    """
    from agate.table import Table

    new_rows = []

    if not utils.issequence(key):
        key = [key]

    if not utils.issequence(properties):
        properties = [properties]

    new_column_names = key + [property_column, value_column]

    row_names = []

    for row in self.rows:
        k = tuple(row[n] for n in key)
        left_row = list(k)

        if len(k) == 1:
            row_names.append(k[0])
        else:
            row_names.append(k)

        for f in properties:
            new_rows.append(
                Row(tuple(left_row + [f, row[f]]), new_column_names))

    key_column_types = [
        self.column_types[self.column_names.index(name)] for name in key
    ]

    if column_types is None or isinstance(column_types, TypeTester):
        tester = TypeTester() if column_types is None else column_types
        force_update = dict(zip(key, key_column_types))
        force_update.update(tester._force)
        tester._force = force_update

        new_column_types = tester.run(new_rows, new_column_names)
    else:
        new_column_types = key_column_types + list(column_types)

    return Table(new_rows,
                 new_column_names,
                 new_column_types,
                 row_names=row_names)
Beispiel #32
0
def from_json(cls,
              path,
              row_names=None,
              key=None,
              newline=False,
              column_types=None,
              encoding='utf-8',
              **kwargs):
    """
    Create a new table from a JSON file.

    Once the JSON has been deseralized, the resulting Python object is
    passed to :meth:`.Table.from_object`.

    If the file contains a top-level dictionary you may specify what
    property contains the row list using the :code:`key` parameter.

    :code:`kwargs` will be passed through to :meth:`json.load`.

    :param path:
        Filepath or file-like object from which to read JSON data.
    :param row_names:
        See the :meth:`.Table.__init__`.
    :param key:
        The key of the top-level dictionary that contains a list of row
        arrays.
    :param newline:
        If `True` then the file will be parsed as "newline-delimited JSON".
    :param column_types:
        See :meth:`.Table.__init__`.
    :param encoding:
        According to RFC4627, JSON text shall be encoded in Unicode; the default encoding is
        UTF-8. You can override this by using any encoding supported by your Python's open() function
        if :code:`path` is a filepath. If passing in a file handle, it is assumed you have already opened it with the
        correct encoding specified.
    """
    from agate.table import Table

    if key is not None and newline:
        raise ValueError('key and newline may not be specified together.')

    close = False

    try:
        if newline:
            js = []

            if hasattr(path, 'read'):
                for line in path:
                    js.append(
                        json.loads(line,
                                   object_pairs_hook=OrderedDict,
                                   parse_float=Decimal,
                                   **kwargs))
            else:
                f = io.open(path, encoding=encoding)
                close = True

                for line in f:
                    js.append(
                        json.loads(line,
                                   object_pairs_hook=OrderedDict,
                                   parse_float=Decimal,
                                   **kwargs))
        else:
            if hasattr(path, 'read'):
                js = json.load(path,
                               object_pairs_hook=OrderedDict,
                               parse_float=Decimal,
                               **kwargs)
            else:
                f = io.open(path, encoding=encoding)
                close = True

                js = json.load(f,
                               object_pairs_hook=OrderedDict,
                               parse_float=Decimal,
                               **kwargs)

        if isinstance(js, dict):
            if not key:
                raise TypeError(
                    'When converting a JSON document with a top-level dictionary element, a key must be specified.'
                )

            js = js[key]

    finally:
        if close:
            f.close()

    return Table.from_object(js,
                             row_names=row_names,
                             column_types=column_types)
Beispiel #33
0
def from_fixed(cls,
               path,
               schema_path,
               column_names=utils.default,
               column_types=None,
               row_names=None,
               encoding='utf-8',
               schema_encoding='utf-8'):
    """
    Create a new table from a fixed-width file and a CSV schema.

    Schemas must be in the "ffs" format. There is a repository of such schemas
    maintained at `wireservice/ffs <https://github.com/wireservice/ffs>`_.

    :param path:
        File path or file-like object from which to read fixed-width data.
    :param schema_path:
        File path or file-like object from which to read schema (CSV) data.
    :param column_names:
        By default, these will be parsed from the schema. For alternatives, see
        :meth:`.Table.__init__`.
    :param column_types:
        See :meth:`.Table.__init__`.
    :param row_names:
        See :meth:`.Table.__init__`.
    :param encoding:
        Character encoding of the fixed-width file. Note: if passing in a file
        handle it is assumed you have already opened it with the correct
        encoding specified.
    :param schema_encoding:
        Character encoding of the schema file. Note: if passing in a file
        handle it is assumed you have already opened it with the correct
        encoding specified.
    """
    from agate.table import Table

    close_f = False

    if not hasattr(path, 'read'):
        f = io.open(path, encoding=encoding)
        close_f = True
    else:
        f = path

    close_schema_f = False

    if not hasattr(schema_path, 'read'):
        schema_f = io.open(schema_path, encoding=schema_encoding)
        close_schema_f = True
    else:
        schema_f = path

    reader = fixed.reader(f, schema_f)
    rows = list(reader)

    if close_f:
        f.close()

    if close_schema_f:
        schema_f.close()

    if column_names == utils.default:
        column_names = reader.fieldnames

    return Table(rows, column_names, column_types, row_names=row_names)
Beispiel #34
0
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, skip_lines=0, header=True, sniff_limit=0,
             encoding='utf-8', row_limit=None, **kwargs):
    """
    Create a new table from a CSV.

    This method uses agate's builtin CSV reader, which supplies encoding
    support for both Python 2 and Python 3.

    :code:`kwargs` will be passed through to the CSV reader.

    :param path:
        Filepath or file-like object from which to read CSV data. If a file-like
        object is specified, it must be seekable. If using Python 2, the file
        should be opened in binary mode (`rb`).
    :param column_names:
        See :meth:`.Table.__init__`.
    :param column_types:
        See :meth:`.Table.__init__`.
    :param row_names:
        See :meth:`.Table.__init__`.
    :param skip_lines:
        The number of lines to skip from the top of the file.
    :param header:
        If :code:`True`, the first row of the CSV is assumed to contain column
        names. If :code:`header` and :code:`column_names` are both specified
        then a row will be skipped, but :code:`column_names` will be used.
    :param sniff_limit:
        Limit CSV dialect sniffing to the specified number of bytes. Set to
        None to sniff the entire file. Defaults to 0 (no sniffing).
    :param encoding:
        Character encoding of the CSV file. Note: if passing in a file
        handle it is assumed you have already opened it with the correct
        encoding specified.
    :param row_limit:
        Limit how many rows of data will be read.
    """
    from agate import csv
    from agate.table import Table

    close = False

    try:
        if hasattr(path, 'read'):
            f = path
        else:
            if six.PY2:
                f = open(path, 'Urb')
            else:
                f = io.open(path, encoding=encoding)

            close = True

        if isinstance(skip_lines, int):
            while skip_lines > 0:
                f.readline()
                skip_lines -= 1
        else:
            raise ValueError('skip_lines argument must be an int')

        contents = six.StringIO(f.read())

        if sniff_limit is None:
            kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue())
        elif sniff_limit > 0:
            kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()[:sniff_limit])

        if six.PY2:
            kwargs['encoding'] = encoding

        reader = csv.reader(contents, header=header, **kwargs)

        if header:
            if column_names is None:
                column_names = next(reader)
            else:
                next(reader)

        if row_limit is None:
            rows = tuple(reader)
        else:
            rows = tuple(itertools.islice(reader, row_limit))

    finally:
        if close:
            f.close()

    return Table(rows, column_names, column_types, row_names=row_names)