Esempio n. 1
0
    def __init__(self,
                 tables,
                 keys,
                 key_name='group',
                 key_type=None,
                 _is_fork=False):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        if not _is_fork:
            for table in tables:
                if any(not isinstance(a, type(b)) for a, b in zip_longest(
                        table.column_types, self._column_types)):
                    raise ValueError(
                        'Not all tables have the same column types!')

                if table.column_names != self._column_names:
                    raise ValueError(
                        'Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)
Esempio n. 2
0
    def test_ne(self):
        row2 = MappedSequence(self.data, self.column_names)

        self.assertFalse(self.row != (u'a', u'b', u'c'))
        self.assertFalse(self.row != [u'a', u'b', u'c'])
        self.assertFalse(self.row != row2)
        self.assertTrue(self.row != (u'a', u'b', u'c', u'd'))
        self.assertTrue(self.row != 1)
    def test_stringify_long(self):
        column_names = ('one', 'two', 'three', 'four', 'five', 'six')
        data = (u'a', u'b', u'c', u'd', u'e', u'f')
        row = MappedSequence(data, column_names)

        if six.PY2:
            self.assertEqual(str(row), "<agate.MappedSequence: (u'a', u'b', u'c', u'd', u'e', ...)>")
        else:
            self.assertEqual(str(row), "<agate.MappedSequence: ('a', 'b', 'c', 'd', 'e', ...)>")
Esempio n. 4
0
    def __init__(self, tables, keys, key_name='group', key_type=None):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        for table in tables:
            if table.column_types != self.column_types:
                raise ValueError('Not all tables have the same column types!')

            if table.column_names != self.column_names:
                raise ValueError('Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)
Esempio n. 5
0
    def __init__(self, tables, keys, key_name='group', key_type=None):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        for table in tables:
            if table.column_types != self.column_types:
                raise ValueError('Not all tables have the same column types!')

            if table.column_names != self.column_names:
                raise ValueError('Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)
Esempio n. 6
0
    def __init__(self, tables, keys, key_name='group', key_type=None, _is_fork=False):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        if not _is_fork:
            for table in tables:
                if any(not isinstance(a, type(b)) for a, b in zip_longest(table.column_types, self.column_types)):
                    raise ValueError('Not all tables have the same column types!')

                if table.column_names != self.column_names:
                    raise ValueError('Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)
Esempio n. 7
0
    def __init__(self, rows, column_info, row_names=None, _is_fork=False):
        column_info = list(column_info)

        if isinstance(column_info[0], Column):
            self._column_names = tuple(c.name for c in column_info)
            self._column_types = tuple(c.data_type for c in column_info)
        else:
            column_names, self._column_types = zip(*column_info)

            self._column_names = []

            # Validation
            for i, column_name in enumerate(column_names):
                if not column_name:
                    self._column_names.append(letter_name(i))
                else:
                    if not isinstance(column_name, six.string_types):
                        raise ValueError('Column names must be strings.')

                    self._column_names.append(column_name)

            len_column_names = len(self._column_names)

            if len(set(self._column_names)) != len_column_names:
                raise ValueError('Duplicate column names are not allowed.')

            self._column_names = tuple(self._column_names)

            for column_type in self._column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError('Column types must be instances of DataType.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError('Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row, [None] * (len(self.column_names) - len_row))

                new_rows.append(Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif isinstance(row_names, Sequence):
                computed_row_names = row_names
            else:
                raise ValueError('row_names must be a column name, function or sequence')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i, (name, data_type) in enumerate(zip(self._column_names, self._column_types)):
            column = Column(i, name, data_type, self._rows, row_names=self._row_names)
            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
Esempio n. 8
0
class Table(object):
    """
    A dataset consisting of rows and columns. Columns refer to "vertical" slices
    of data that must all be of the same type. Rows refer to "horizontal" slices
    of data that may (and usually do) contain mixed types.

    The sequence of :class:`.Column` instances are retrieved via the
    :attr:`.Table.columns` property. They may be accessed by either numeric
    index or by unique column name.

    The sequence of :class:`.Row` instances are retrieved via the
    :attr:`.Table.rows` property. They may be accessed by either numeric index
    or, if specified, unique row names.

    :param rows:
        The data as a sequence of any sequences: tuples, lists, etc. If
        any row has fewer values than the number of columns, it will be filled
        out with nulls. No row may have more values than the number of columns.
    :param column_names:
        A sequence of string names for each column or `None`, in which case
        column names will be automatically assigned using :func:`.letter_name`.
    :param column_types:
        A sequence of instances of :class:`.DataType` or an instance of
        :class:`.TypeTester` or `None` in which case a generic TypeTester will
        be used. Alternatively, a dictionary with column names as keys and
        instances of :class:`.DataType` as values to specify some types.
    :param row_names:
        Specifies unique names for each row. This parameter is
        optional. If specified it may be 1) the name of a single column that
        contains a unique identifier for each row, 2) a key function that takes
        a :class:`.Row` and returns a unique identifier or 3) a sequence of
        unique identifiers of the same length as the sequence of rows. The
        uniqueness of resulting identifiers is not validated, so be certain
        the values you provide are truly unique.
    :param _is_fork:
        Used internally to skip certain validation steps when data
        is propagated from an existing table. When :code:`True`, rows are
        assumed to be :class:`.Row` instances, rather than raw data.
    """
    def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False):
        if isinstance(rows, six.string_types):
            raise ValueError('When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?')

        # Validate column names
        if column_names:
            self._column_names = utils.deduplicate(column_names, column_names=True)
        elif rows:
            self._column_names = tuple(utils.letter_name(i) for i in range(len(rows[0])))
            warnings.warn('Column names not specified. "%s" will be used as names.' % str(self._column_names), RuntimeWarning, stacklevel=2)
        else:
            self._column_names = tuple()

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()
        elif isinstance(column_types, dict):
            for v in column_types.values():
                if not isinstance(v, DataType):
                    raise ValueError('Column types must be instances of DataType.')

            column_types = TypeTester(force=column_types)
        elif not isinstance(column_types, TypeTester):
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError('Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError('column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError('Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row, [None] * (len_column_names - len_row))

                row_values = []
                for j, d in enumerate(row):
                    try:
                        row_values.append(cast_funcs[j](d))
                    except CastError as e:
                        raise CastError(str(e) + ' Error at row %s column %s.' % (i, self._column_names[j]))

                new_rows.append(Row(row_values, self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif utils.issequence(row_names):
                computed_row_names = row_names
            else:
                raise ValueError('row_names must be a column name, function or sequence')

            for row_name in computed_row_names:
                if type(row_name) is int:
                    raise ValueError('Row names cannot be of type int. Use Decimal for numbered row names.')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i in range(len_column_names):
            name = self._column_names[i]
            data_type = self._column_types[i]

            column = Column(i, name, data_type, self._rows, row_names=self._row_names)

            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)

    def __str__(self):
        """
        Print the table's structure using :meth:`.Table.print_structure`.
        """
        structure = six.StringIO()

        self.print_structure(output=structure)

        return structure.getvalue()

    def __len__(self):
        """
        Shorthand for :code:`len(table.rows)`.
        """
        return self._rows.__len__()

    def __iter__(self):
        """
        Shorthand for :code:`iter(table.rows)`.
        """
        return self._rows.__iter__()

    def __getitem__(self, key):
        """
        Shorthand for :code:`table.rows[foo]`.
        """
        return self._rows.__getitem__(key)

    @property
    def column_types(self):
        """
        An tuple :class:`.DataType` instances.
        """
        return self._column_types

    @property
    def column_names(self):
        """
        An tuple of strings.
        """
        return self._column_names

    @property
    def row_names(self):
        """
        An tuple of strings, if this table has row names.

        If this table does not have row names, then :code:`None`.
        """
        return self._row_names

    @property
    def columns(self):
        """
        A :class:`.MappedSequence` with column names for keys and
        :class:`.Column` instances for values.
        """
        return self._columns

    @property
    def rows(self):
        """
        A :class:`.MappedSeqeuence` with row names for keys (if specified) and
        :class:`.Row` instances for values.
        """
        return self._rows

    def _fork(self, rows, column_names=None, column_types=None, row_names=None):
        """
        Create a new table using the metadata from this one.

        This method is used internally by functions like
        :meth:`.Table.order_by`.

        :param rows:
            Row data for the forked table.
        :param column_names:
            Column names for the forked table. If not specified, fork will use
            this table's column names.
        :param column_types:
            Column types for the forked table. If not specified, fork will use
            this table's column names.
        :param row_names:
            Row names for the forked table. If not specified, fork will use
            this table's row names.
        """
        if column_names is None:
            column_names = self._column_names

        if column_types is None:
            column_types = self._column_types

        if row_names is None:
            row_names = self._row_names

        return Table(rows, column_names, column_types, row_names=row_names, _is_fork=True)

    def print_csv(self, **kwargs):
        """
        Print this table as a CSV.

        This is the same as passing :code:`sys.stdout` to :meth:`.Table.to_csv`.

        :code:`kwargs` will be passed on to :meth:`.Table.to_csv`.
        """
        self.to_csv(sys.stdout, **kwargs)

    def print_json(self, **kwargs):
        """
        Print this table as JSON.

        This is the same as passing :code:`sys.stdout` to
        :meth:`.Table.to_json`.

        :code:`kwargs` will be passed on to :meth:`.Table.to_json`.
        """
        self.to_json(sys.stdout, **kwargs)
Esempio n. 9
0
class TestMappedSequence(unittest.TestCase):
    def setUp(self):
        self.column_names = ('one', 'two', 'three')
        self.data = (u'a', u'b', u'c')
        self.row = MappedSequence(self.data, self.column_names)

    def test_is_immutable(self):
        with self.assertRaises(TypeError):
            self.row[0] = 'foo'

        with self.assertRaises(TypeError):
            self.row['one'] = 100

    def test_stringify(self):
        if six.PY2:
            self.assertEqual(str(self.row), "<agate.MappedSequence: (u'a', u'b', u'c')>")
        else:
            self.assertEqual(str(self.row), "<agate.MappedSequence: ('a', 'b', 'c')>")

    def test_stringify_long(self):
        column_names = ('one', 'two', 'three', 'four', 'five', 'six')
        data = (u'a', u'b', u'c', u'd', u'e', u'f')
        row = MappedSequence(data, column_names)

        if six.PY2:
            self.assertEqual(str(row), "<agate.MappedSequence: (u'a', u'b', u'c', u'd', u'e', ...)>")
        else:
            self.assertEqual(str(row), "<agate.MappedSequence: ('a', 'b', 'c', 'd', 'e', ...)>")

    def test_length(self):
        self.assertEqual(len(self.row), 3)

    def test_eq(self):
        row2 = MappedSequence(self.data, self.column_names)

        self.assertTrue(self.row == (u'a', u'b', u'c'))
        self.assertTrue(self.row == [u'a', u'b', u'c'])
        self.assertTrue(self.row == row2)
        self.assertFalse(self.row == (u'a', u'b', u'c', u'd'))
        self.assertFalse(self.row == 1)

    def test_ne(self):
        row2 = MappedSequence(self.data, self.column_names)

        self.assertFalse(self.row != (u'a', u'b', u'c'))
        self.assertFalse(self.row != [u'a', u'b', u'c'])
        self.assertFalse(self.row != row2)
        self.assertTrue(self.row != (u'a', u'b', u'c', u'd'))
        self.assertTrue(self.row != 1)

    def test_contains(self):
        self.assertTrue('a' in self.row)
        self.assertFalse('d' in self.row)

    def test_set_item(self):
        with self.assertRaises(TypeError):
            self.row['one'] = u't'

        with self.assertRaises(TypeError):
            self.row['five'] = u'g'

    def test_get_item(self):
        self.assertEqual(self.row['one'], 'a')
        self.assertEqual(self.row['two'], 'b')
        self.assertEqual(self.row['three'], 'c')

    def test_get_by_key(self):
        self.assertEqual(self.row['one'], 'a')
        self.assertEqual(self.row[0], 'a')

    def test_get_by_slice(self):
        self.assertSequenceEqual(self.row[1:], ('b', 'c'))

    def test_get_invalid(self):
        with self.assertRaises(IndexError):
            self.row[3]

        with self.assertRaises(KeyError):
            self.row['foo']

    def test_keys(self):
        self.assertIs(self.row.keys(), self.column_names)

    def test_values(self):
        self.assertIs(self.row.values(), self.data)

    def test_items(self):
        self.assertSequenceEqual(self.row.items(), [
            ('one', 'a'),
            ('two', 'b'),
            ('three', 'c')
        ])

    def test_get(self):
        self.assertEqual(self.row.get('one'), 'a')

    def test_get_default(self):
        self.assertEqual(self.row.get('four'), None)
        self.assertEqual(self.row.get('four', 'foo'), 'foo')

    def test_dict(self):
        self.assertDictEqual(self.row.dict(), {
            'one': 'a',
            'two': 'b',
            'three': 'c'
        })

    def test_dict_no_keys(self):
        row = MappedSequence(self.data)

        with self.assertRaises(KeyError):
            row.dict()

    def test_iterate(self):
        it = iter(self.row)

        self.assertSequenceEqual(next(it), 'a')
        self.assertSequenceEqual(next(it), 'b')
        self.assertSequenceEqual(next(it), 'c')

        with self.assertRaises(StopIteration):
            next(it)
Esempio n. 10
0
    def test_dict_no_keys(self):
        row = MappedSequence(self.data)

        with self.assertRaises(KeyError):
            row.dict()
Esempio n. 11
0
    def __init__(self,
                 rows,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 _is_fork=False):
        if isinstance(rows, six.string_types):
            raise ValueError(
                'When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?'
            )

        # Validate column names
        if column_names:
            final_column_names = []

            for i, column_name in enumerate(column_names):
                if column_name is None:
                    new_column_name = utils.letter_name(i)
                    warnings.warn(
                        'Column name not specified. "%s" will be used as name.'
                        % new_column_name, RuntimeWarning)
                elif isinstance(column_name, six.string_types):
                    new_column_name = column_name
                else:
                    raise ValueError('Column names must be strings or None.')

                final_column_name = new_column_name
                duplicates = 0
                while final_column_name in final_column_names:
                    final_column_name = new_column_name + '_' + str(
                        duplicates + 2)
                    duplicates += 1

                if duplicates > 0:
                    warn_duplicate_column(new_column_name, final_column_name)

                final_column_names.append(final_column_name)

            self._column_names = tuple(final_column_names)
        elif rows:
            self._column_names = tuple(
                utils.letter_name(i) for i in range(len(rows[0])))
            warnings.warn(
                'Column names not specified. "%s" will be used as names.' %
                str(self._column_names),
                RuntimeWarning,
                stacklevel=2)
        else:
            self._column_names = []

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()

        elif isinstance(column_types, dict):
            for v in six.itervalues(column_types):
                if not isinstance(v, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')
            column_types = TypeTester(force=column_types)

        elif not isinstance(column_types, TypeTester):
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError(
                'column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError(
                        'Row %i has %i values, but Table only has %i columns.'
                        % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row,
                                [None] * (len(self.column_names) - len_row))

                new_rows.append(
                    Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)),
                        self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif utils.issequence(row_names):
                computed_row_names = row_names
            else:
                raise ValueError(
                    'row_names must be a column name, function or sequence')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i, (name, data_type) in enumerate(
                zip(self._column_names, self._column_types)):
            column = Column(i,
                            name,
                            data_type,
                            self._rows,
                            row_names=self._row_names)
            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
Esempio n. 12
0
class Table(object):
    """
    A dataset consisting of rows and columns. Columns refer to "vertical" slices
    of data that must all be of the same type. Rows refer to "horizontal" slices
    of data that may (and usually do) contain mixed types.

    The sequence of :class:`.Column` instances are retrieved via the
    :attr:`.Table.columns` property. They may be accessed by either numeric
    index or by unique column name.

    The sequence of :class:`.Row` instances are retrieved via the
    :attr:`.Table.rows` property. They may be accessed by either numeric index
    or, if specified, unique row names.

    :param rows:
        The data as a sequence of any sequences: tuples, lists, etc. If
        any row has fewer values than the number of columns, it will be filled
        out with nulls. No row may have more values than the number of columns.
    :param column_names:
        A sequence of string names for each column or `None`, in which case
        column names will be automatically assigned using :func:`.letter_name`.
    :param column_types:
        A sequence of instances of :class:`.DataType` or an instance of
        :class:`.TypeTester` or `None` in which case a generic TypeTester will
        be used. Alternatively, a dictionary with column names as keys and
        instances of :class:`.DataType` as values to specify some types.
    :param row_names:
        Specifies unique names for each row. This parameter is
        optional. If specified it may be 1) the name of a single column that
        contains a unique identifier for each row, 2) a key function that takes
        a :class:`.Row` and returns a unique identifier or 3) a sequence of
        unique identifiers of the same length as the sequence of rows. The
        uniqueness of resulting identifiers is not validated, so be certain
        the values you provide are truly unique.
    :param _is_fork:
        Used internally to skip certain validation steps when data
        is propagated from an existing table. When :code:`True`, rows are
        assumed to be :class:`.Row` instances, rather than raw data.
    """
    def __init__(self,
                 rows,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 _is_fork=False):
        if isinstance(rows, six.string_types):
            raise ValueError(
                'When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?'
            )

        # Validate column names
        if column_names:
            final_column_names = []

            for i, column_name in enumerate(column_names):
                if not column_name:
                    new_column_name = utils.letter_name(i)
                    warn_unnamed_column(i, new_column_name)
                elif isinstance(column_name, six.string_types):
                    new_column_name = column_name
                else:
                    raise ValueError('Column names must be strings or None.')

                final_column_name = new_column_name
                duplicates = 0

                while final_column_name in final_column_names:
                    final_column_name = new_column_name + '_' + str(
                        duplicates + 2)
                    duplicates += 1

                if duplicates > 0:
                    warn_duplicate_column(new_column_name, final_column_name)

                final_column_names.append(final_column_name)

            self._column_names = tuple(final_column_names)
        elif rows:
            self._column_names = tuple(
                utils.letter_name(i) for i in range(len(rows[0])))
            warnings.warn(
                'Column names not specified. "%s" will be used as names.' %
                str(self._column_names),
                RuntimeWarning,
                stacklevel=2)
        else:
            self._column_names = tuple()

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()
        elif isinstance(column_types, dict):
            for v in column_types.values():
                if not isinstance(v, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

            column_types = TypeTester(force=column_types)
        elif not isinstance(column_types, TypeTester):
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError(
                'column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError(
                        'Row %i has %i values, but Table only has %i columns.'
                        % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row, [None] * (len_column_names - len_row))

                row_values = []
                for j, d in enumerate(row):
                    try:
                        row_values.append(cast_funcs[j](d))
                    except CastError as e:
                        raise CastError(
                            str(e) + ' Error at row %s column %s.' %
                            (i, self._column_names[j]))

                new_rows.append(Row(row_values, self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif utils.issequence(row_names):
                computed_row_names = row_names
            else:
                raise ValueError(
                    'row_names must be a column name, function or sequence')

            for row_name in computed_row_names:
                if type(row_name) is int:
                    raise ValueError(
                        'Row names cannot be of type int. Use Decimal for numbered row names.'
                    )

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i in range(len_column_names):
            name = self._column_names[i]
            data_type = self._column_types[i]

            column = Column(i,
                            name,
                            data_type,
                            self._rows,
                            row_names=self._row_names)

            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)

    def __str__(self):
        """
        Print the table's structure using :meth:`.Table.print_structure`.
        """
        structure = six.StringIO()

        self.print_structure(output=structure)

        return structure.getvalue()

    def __len__(self):
        """
        Shorthand for :code:`len(table.rows)`.
        """
        return self._rows.__len__()

    def __iter__(self):
        """
        Shorthand for :code:`iter(table.rows)`.
        """
        return self._rows.__iter__()

    def __getitem__(self, key):
        """
        Shorthand for :code:`table.rows[foo]`.
        """
        return self._rows.__getitem__(key)

    @property
    def column_types(self):
        """
        An tuple :class:`.DataType` instances.
        """
        return self._column_types

    @property
    def column_names(self):
        """
        An tuple of strings.
        """
        return self._column_names

    @property
    def row_names(self):
        """
        An tuple of strings, if this table has row names.

        If this table does not have row names, then :code:`None`.
        """
        return self._row_names

    @property
    def columns(self):
        """
        A :class:`.MappedSequence` with column names for keys and
        :class:`.Column` instances for values.
        """
        return self._columns

    @property
    def rows(self):
        """
        A :class:`.MappedSeqeuence` with row names for keys (if specified) and
        :class:`.Row` instances for values.
        """
        return self._rows

    def _fork(self,
              rows,
              column_names=None,
              column_types=None,
              row_names=None):
        """
        Create a new table using the metadata from this one.

        This method is used internally by functions like
        :meth:`.Table.order_by`.

        :param rows:
            Row data for the forked table.
        :param column_names:
            Column names for the forked table. If not specified, fork will use
            this table's column names.
        :param column_types:
            Column types for the forked table. If not specified, fork will use
            this table's column names.
        :param row_names:
            Row names for the forked table. If not specified, fork will use
            this table's row names.
        """
        if column_names is None:
            column_names = self._column_names

        if column_types is None:
            column_types = self._column_types

        if row_names is None:
            row_names = self._row_names

        return Table(rows,
                     column_names,
                     column_types,
                     row_names=row_names,
                     _is_fork=True)

    def print_csv(self, **kwargs):
        """
        Print this table as a CSV.

        This is the same as passing :code:`sys.stdout` to :meth:`.Table.to_csv`.

        :code:`kwargs` will be passed on to :meth:`.Table.to_csv`.
        """
        self.to_csv(sys.stdout, **kwargs)

    def print_json(self, **kwargs):
        """
        Print this table as JSON.

        This is the same as passing :code:`sys.stdout` to
        :meth:`.Table.to_json`.

        :code:`kwargs` will be passed on to :meth:`.Table.to_json`.
        """
        self.to_json(sys.stdout, **kwargs)
Esempio n. 13
0
    def __init__(self,
                 rows,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 _is_fork=False):
        if isinstance(rows, six.string_types):
            raise ValueError(
                'When created directly, the first argument to Table must be a sequence of rows. '
                'Did you want agate.Table.from_csv?')

        # Validate column names
        if column_names:
            self._column_names = utils.deduplicate(column_names,
                                                   column_names=True)
        elif rows:
            self._column_names = tuple(
                utils.letter_name(i) for i in range(len(rows[0])))
            warnings.warn(
                'Column names not specified. "%s" will be used as names.' %
                str(self._column_names),
                RuntimeWarning,
                stacklevel=2)
        else:
            self._column_names = tuple()

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()
        elif isinstance(column_types, dict):
            for v in column_types.values():
                if not isinstance(v, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

            column_types = TypeTester(force=column_types)
        elif not isinstance(column_types, TypeTester):
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError(
                'column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError(
                        'Row %i has %i values, but Table only has %i columns.'
                        % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row, [None] * (len_column_names - len_row))

                row_values = []
                for j, d in enumerate(row):
                    try:
                        row_values.append(cast_funcs[j](d))
                    except CastError as e:
                        raise CastError(
                            str(e) + ' Error at row %s column %s.' %
                            (i, self._column_names[j]))

                new_rows.append(Row(row_values, self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif utils.issequence(row_names):
                computed_row_names = row_names
            else:
                raise ValueError(
                    'row_names must be a column name, function or sequence')

            for row_name in computed_row_names:
                if type(row_name) is int:
                    raise ValueError(
                        'Row names cannot be of type int. Use Decimal for numbered row names.'
                    )

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i in range(len_column_names):
            name = self._column_names[i]
            data_type = self._column_types[i]

            column = Column(i,
                            name,
                            data_type,
                            self._rows,
                            row_names=self._row_names)

            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
Esempio n. 14
0
    def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False):
        if isinstance(rows, six.string_types):
            raise ValueError('When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?')

        # Validate column names
        if column_names:
            self._column_names = utils.deduplicate(column_names, column_names=True)
        elif rows:
            self._column_names = tuple(utils.letter_name(i) for i in range(len(rows[0])))
            warnings.warn('Column names not specified. "%s" will be used as names.' % str(self._column_names), RuntimeWarning, stacklevel=2)
        else:
            self._column_names = tuple()

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()
        elif isinstance(column_types, dict):
            for v in column_types.values():
                if not isinstance(v, DataType):
                    raise ValueError('Column types must be instances of DataType.')

            column_types = TypeTester(force=column_types)
        elif not isinstance(column_types, TypeTester):
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError('Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError('column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError('Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row, [None] * (len_column_names - len_row))

                row_values = []
                for j, d in enumerate(row):
                    try:
                        row_values.append(cast_funcs[j](d))
                    except CastError as e:
                        raise CastError(str(e) + ' Error at row %s column %s.' % (i, self._column_names[j]))

                new_rows.append(Row(row_values, self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif utils.issequence(row_names):
                computed_row_names = row_names
            else:
                raise ValueError('row_names must be a column name, function or sequence')

            for row_name in computed_row_names:
                if type(row_name) is int:
                    raise ValueError('Row names cannot be of type int. Use Decimal for numbered row names.')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i in range(len_column_names):
            name = self._column_names[i]
            data_type = self._column_types[i]

            column = Column(i, name, data_type, self._rows, row_names=self._row_names)

            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
Esempio n. 15
0
    def __init__(self,
                 rows,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 _is_fork=False):
        # Validate column names
        if column_names:
            final_column_names = []

            for i, column_name in enumerate(column_names):
                if column_name is None:
                    final_column_names.append(utils.letter_name(i))
                elif isinstance(column_name, six.string_types):
                    final_column_names.append(column_name)
                else:
                    raise ValueError('Column names must be strings or None.')

            if len(set(final_column_names)) != len(final_column_names):
                raise ValueError('Duplicate column names are not allowed.')

            self._column_names = tuple(final_column_names)
        else:
            self._column_names = tuple(
                utils.letter_name(i) for i in range(len(rows[0])))

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()
        elif isinstance(column_types, TypeTester):
            pass
        else:
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError(
                'column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError(
                        'Row %i has %i values, but Table only has %i columns.'
                        % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row,
                                [None] * (len(self.column_names) - len_row))

                new_rows.append(
                    Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)),
                        self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif isinstance(row_names, Sequence):
                computed_row_names = row_names
            else:
                raise ValueError(
                    'row_names must be a column name, function or sequence')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i, (name, data_type) in enumerate(
                zip(self._column_names, self._column_types)):
            column = Column(i,
                            name,
                            data_type,
                            self._rows,
                            row_names=self._row_names)
            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
Esempio n. 16
0
    def test_dict_no_keys(self):
        row = MappedSequence(self.data)

        with self.assertRaises(KeyError):
            row.dict()
Esempio n. 17
0
class TestMappedSequence(unittest.TestCase):
    def setUp(self):
        self.column_names = ('one', 'two', 'three')
        self.data = (u'a', u'b', u'c')
        self.row = MappedSequence(self.data, self.column_names)

    def test_is_immutable(self):
        with self.assertRaises(TypeError):
            self.row[0] = 'foo'

        with self.assertRaises(TypeError):
            self.row['one'] = 100

    def test_stringify(self):
        if six.PY2:
            self.assertEqual(str(self.row),
                             "<agate.MappedSequence: (u'a', u'b', u'c')>")
        else:
            self.assertEqual(str(self.row),
                             "<agate.MappedSequence: ('a', 'b', 'c')>")

    def test_stringify_long(self):
        column_names = ('one', 'two', 'three', 'four', 'five', 'six')
        data = (u'a', u'b', u'c', u'd', u'e', u'f')
        row = MappedSequence(data, column_names)

        if six.PY2:
            self.assertEqual(
                str(row),
                "<agate.MappedSequence: (u'a', u'b', u'c', u'd', u'e', ...)>")
        else:
            self.assertEqual(
                str(row),
                "<agate.MappedSequence: ('a', 'b', 'c', 'd', 'e', ...)>")

    def test_length(self):
        self.assertEqual(len(self.row), 3)

    def test_eq(self):
        row2 = MappedSequence(self.data, self.column_names)

        self.assertTrue(self.row == (u'a', u'b', u'c'))
        self.assertTrue(self.row == [u'a', u'b', u'c'])
        self.assertTrue(self.row == row2)
        self.assertFalse(self.row == (u'a', u'b', u'c', u'd'))
        self.assertFalse(self.row == 1)

    def test_ne(self):
        row2 = MappedSequence(self.data, self.column_names)

        self.assertFalse(self.row != (u'a', u'b', u'c'))
        self.assertFalse(self.row != [u'a', u'b', u'c'])
        self.assertFalse(self.row != row2)
        self.assertTrue(self.row != (u'a', u'b', u'c', u'd'))
        self.assertTrue(self.row != 1)

    def test_contains(self):
        self.assertTrue('a' in self.row)
        self.assertFalse('d' in self.row)

    def test_get_item(self):
        self.assertEqual(self.row['one'], 'a')
        self.assertEqual(self.row['two'], 'b')
        self.assertEqual(self.row['three'], 'c')

    def test_get_by_key(self):
        self.assertEqual(self.row['one'], 'a')
        self.assertEqual(self.row[0], 'a')

    def test_get_by_slice(self):
        self.assertSequenceEqual(self.row[1:], ('b', 'c'))

    def test_get_invalid(self):
        with self.assertRaises(IndexError):
            self.row[3]

        with self.assertRaises(KeyError):
            self.row['foo']

    def test_keys(self):
        self.assertIs(self.row.keys(), self.column_names)

    def test_values(self):
        self.assertIs(self.row.values(), self.data)

    def test_items(self):
        self.assertSequenceEqual(self.row.items(), [('one', 'a'), ('two', 'b'),
                                                    ('three', 'c')])

    def test_get(self):
        self.assertEqual(self.row.get('one'), 'a')

        with self.assertRaises(KeyError):
            self.row.get('four')

    def test_get_default(self):
        self.assertEqual(self.row.get('four', 'foo'), 'foo')

    def test_dict(self):
        self.assertDictEqual(self.row.dict(), {
            'one': 'a',
            'two': 'b',
            'three': 'c'
        })

    def test_dict_no_keys(self):
        row = MappedSequence(self.data)

        with self.assertRaises(KeyError):
            row.dict()

    def test_iterate(self):
        it = iter(self.row)

        self.assertSequenceEqual(next(it), 'a')
        self.assertSequenceEqual(next(it), 'b')
        self.assertSequenceEqual(next(it), 'c')

        with self.assertRaises(StopIteration):
            next(it)
Esempio n. 18
0
 def setUp(self):
     self.column_names = ('one', 'two', 'three')
     self.data = (u'a', u'b', u'c')
     self.row = MappedSequence(self.data, self.column_names)
Esempio n. 19
0
 def setUp(self):
     self.column_names = ('one', 'two', 'three')
     self.data = (u'a', u'b', u'c')
     self.row = MappedSequence(self.data, self.column_names)