Example #1
0
    def test_no_cast_nulls(self):
        values = ('', 'N/A', None)

        t = Text()
        casted = tuple(t.cast(v) for v in values)
        self.assertSequenceEqual(casted, (None, None, None))

        t = Text(cast_nulls=False)
        casted = tuple(t.cast(v) for v in values)
        self.assertSequenceEqual(casted, ('', 'N/A', None))
Example #2
0
    def test_monkeypatch_shadow(self):
        before_table = Table([['blah'], ], ['foo'], [Text()])

        Table.monkeypatch(TryPatchShadow)

        after_table = Table([['blah'], ], ['foo'], [Text()])

        self.assertIsInstance(before_table.columns, MappedSequence)
        self.assertIsInstance(after_table.columns, MappedSequence)

        with self.assertRaises(AttributeError):
            after_table.foo == 'foo'
Example #3
0
    def test_normalize_column_types(self):
        table = Table(self.rows, self.column_names, self.column_types)

        normalized_table = table.normalize('one', 'three', column_types=[Text(), Text()])

        normal_rows = (
            (1, 'three', '4'),
            (2, 'three', '3'),
            (None, 'three', '2')
        )

        self.assertRows(normalized_table, normal_rows)
        self.assertColumnNames(normalized_table, ['one', 'property', 'value'])
        self.assertColumnTypes(normalized_table, [Number, Text, Text])
Example #4
0
    def test_types_force_text(self):
        rows = [('1.7', ), ('200000000', ), ('', )]

        tester = TypeTester(types=[Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Text)
Example #5
0
    def __init__(self,
                 tables,
                 keys,
                 key_name='group',
                 key_type=None,
                 _is_fork=False):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        if not _is_fork:
            for table in tables:
                if any(not isinstance(a, type(b)) for a, b in zip_longest(
                        table.column_types, self._column_types)):
                    raise ValueError(
                        'Not all tables have the same column types!')

                if table.column_names != self._column_names:
                    raise ValueError(
                        'Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)
Example #6
0
    def setUp(self):
        self.table1 = (('a', 1, 4), ('b', 3, 7), ('c', 2, 2))

        self.table2 = (('a', 0, 3), ('b', 2, 3), ('c', 5, 3))

        self.table3 = (('a', 1, 10), ('b', 2, 1), ('c', 3, None))

        self.text_type = Text()
        self.number_type = Number()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.text_type, self.number_type, self.number_type
        ]

        self.tables = OrderedDict([
            ('table1', Table(self.table1, self.column_names,
                             self.column_types)),
            ('table2', Table(self.table2, self.column_names,
                             self.column_types)),
            ('table3', Table(self.table3, self.column_names,
                             self.column_types))
        ])

        self.tablesets = TableSet(self.tables.values(), self.tables.keys())
Example #7
0
    def test_types_no_boolean(self):
        rows = [('True', ), ('False', ), ('False', )]

        tester = TypeTester(types=[Number(), Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Text)
Example #8
0
    def test_from_csv_type_tester(self):
        tester = TypeTester(force={'number': Text()})

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(
            table, [Text, Text, Boolean, Date, DateTime, TimeDelta])
Example #9
0
    def test_force_type(self):
        rows = [('1.7', ), ('200000000', ), ('', )]

        tester = TypeTester(force={'one': Text()})

        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Text)
Example #10
0
    def test_types_number_locale(self):
        rows = [('1,7', ), ('200.000.000', ), ('', )]

        tester = TypeTester(types=[Number(locale='de_DE.UTF-8'), Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Number)
        self.assertEqual(str(inferred[0].locale), 'de_DE')
Example #11
0
    def group_by(self, key, key_name=None, key_type=None):
        """
        Create a new :class:`Table` for unique value and return them as a
        :class:`.TableSet`. The :code:`key` can be either a column name
        or a function that returns a value to group by.

        Note that when group names will always be coerced to a string,
        regardless of the format of the input column.

        :param key: Either the name of a column from the this table
            to group by, or a :class:`function` that takes a row and returns
            a value to group by.
        :param key_name: A name that describes the grouped properties.
            Defaults to the column name that was grouped on or "group" if
            grouping with a key function. See :class:`.TableSet` for more.
        :param key_type: An instance some subclass of :class:`.DataType`. If
            not provided it will default to a :class`.Text`.
        :returns: A :class:`.TableSet` mapping where the keys are unique
            values from the :code:`key` and the values are new :class:`Table`
            instances containing the grouped rows.
        """
        from agate.tableset import TableSet

        key_is_row_function = hasattr(key, '__call__')

        if key_is_row_function:
            key_name = key_name or 'group'
            key_type = key_type or Text()
        else:
            column = self._columns[key]

            key_name = key_name or column.name
            key_type = key_type or column.data_type

        groups = OrderedDict()

        for row in self._rows:
            if key_is_row_function:
                group_name = key(row)
            else:
                group_name = row[column.name]

            group_name = key_type.cast(group_name)

            if group_name not in groups:
                groups[group_name] = []

            groups[group_name].append(row)

        output = OrderedDict()

        for group, rows in groups.items():
            output[group] = self._fork(rows)

        return TableSet(output.values(),
                        output.keys(),
                        key_name=key_name,
                        key_type=key_type)
Example #12
0
    def test_monkeypatch(self):
        before_table = Table([], ['foo'], [Text()])

        Table.monkeypatch(TryPatch)

        after_table = Table([], ['foo'], [Text()])

        self.assertSequenceEqual(Table.__bases__, [Patchable, TryPatch])

        self.assertIsNotNone(getattr(before_table, 'test'))
        self.assertIsNotNone(getattr(before_table, 'testcls'))

        self.assertIsNotNone(getattr(after_table, 'test'))
        self.assertIsNotNone(getattr(after_table, 'testcls'))

        self.assertEqual(before_table.test(5), 5)
        self.assertEqual(after_table.test(5), 5)
        self.assertEqual(Table.testcls(5), 5)
Example #13
0
    def counts(self, key, key_name=None, key_type=None):
        """
        Count the number of occurrences of each distinct value in a column.
        Creates a new table with only the value and the count. This is
        effectively equivalent to doing a :meth:`Table.group_by` followed by an
        :meth:`.TableSet.aggregate` with a :class:`.Length` aggregator.

        The resulting table will have two columns. The first will have
        the name and type of the specified :code:`key` column or
        :code:`key_name` and :code:`key_type`, if specified. The second will be
        named :code:`count` and will be of type :class:`.Number`.

        :param key:
            Either the name of a column from the this table to count, or a
            :class:`function` that takes a row and returns a value to count.
        :param key_name:
            A name that describes the counted properties. Defaults to the
            column name that was counted or "group" if counting with a key
            function.
        :param key_type:
            An instance some subclass of :class:`.DataType`. If not provided
            it will default to a :class`.Text`.
        """
        key_is_row_function = hasattr(key, '__call__')

        if key_is_row_function:
            key_name = key_name or 'group'
            key_type = key_type or Text()
        else:
            column = self._columns[key]

            key_name = key_name or column.name
            key_type = key_type or column.data_type

        output = OrderedDict()

        for row in self._rows:
            if key_is_row_function:
                group_name = key(row)
            else:
                group_name = row[key_name]

            group_name = key_type.cast(group_name)

            if group_name not in output:
                output[group_name] = 0

            output[group_name] += 1

        column_names = [key_name, 'count']
        column_types = [key_type, Number()]

        return Table(output.items(),
                     column_names,
                     column_types,
                     row_names=tuple(output.keys()))
Example #14
0
    def setUp(self):
        self.rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c'))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.number_type, self.number_type, self.text_type
        ]
Example #15
0
    def setUp(self):
        self.rows = (('Jane', 'Code', 'gender', 'female'), ('Jane', 'Code',
                                                            'age', '27'),
                     ('Jim', 'Program', 'gender', 'male'), ('Jim', 'Bytes',
                                                            'age', '24'))

        self.text_type = Text()

        self.column_names = ['first_name', 'last_name', 'property', 'value']
        self.column_types = [
            self.text_type, self.text_type, self.text_type, self.text_type
        ]
Example #16
0
    def setUp(self):
        self.rows = (('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None),
                     ('b', 3, 4, None))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.text_type, self.number_type, self.number_type,
            self.number_type
        ]

        self.table = Table(self.rows, self.column_names, self.column_types)
Example #17
0
    def setUp(self):
        self.rows = (
            (1, 'a', True, '11/4/2015', '11/4/2015 12:22 PM', '4:15'),
            (2, u'👍', False, '11/5/2015', '11/4/2015 12:45 PM', '6:18'),
            (None, 'b', None, None, None, None)
        )

        self.column_names = [
            'number', 'text', 'boolean', 'date', 'datetime', 'timedelta'
        ]

        self.column_types = [
            Number(), Text(), Boolean(), Date(), DateTime(), TimeDelta()
        ]
Example #18
0
    def test_group_by_key_type(self):
        table = Table(self.rows, self.column_names, self.column_types)

        tableset = table.group_by('two', key_type=Text())

        self.assertIsInstance(tableset, TableSet)
        self.assertEqual(tableset.key_name, 'two')
        self.assertIsInstance(tableset.key_type, Text)

        self.assertIn('2', tableset.keys())
        self.assertIn('3', tableset.keys())

        self.assertSequenceEqual(tableset['2'].columns['one'], ('a', 'a'))
        self.assertSequenceEqual(tableset['3'].columns['one'], (None, 'b'))
Example #19
0
    def test_denormalize_column_types(self):
        table = Table(self.rows, self.column_names, self.column_types)

        normalized_table = table.denormalize(None,
                                             'property',
                                             'value',
                                             column_types=[Text(),
                                                           Number()])

        # NB: value has been overwritten
        normal_rows = (('male', 24), )

        self.assertRows(normalized_table, normal_rows)
        self.assertColumnNames(normalized_table, ['gender', 'age'])
        self.assertColumnTypes(normalized_table, [Text, Number])
Example #20
0
    def setUp(self):
        self.rows = (('joe', 'white', 'male', 20,
                      'blue'), ('jane', 'white', 'female', 20,
                                'blue'), ('josh', 'black', 'male', 20, 'blue'),
                     ('jim', 'latino', 'male', 25,
                      'blue'), ('julia', 'white', 'female', 25, 'green'),
                     ('joan', 'asian', 'female', 25, 'green'))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['name', 'race', 'gender', 'age', 'color']
        self.column_types = [
            self.text_type, self.text_type, self.text_type, self.number_type,
            self.text_type
        ]
Example #21
0
    def setUp(self):
        self.rows = ((Decimal('1.1'), Decimal('2.19'), 'a',
                      None), (Decimal('2.7'), Decimal('3.42'), 'b',
                              None), (None, Decimal('4.1'), 'c', None),
                     (Decimal('2.7'), Decimal('3.42'), 'c', None))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.number_type, self.number_type, self.text_type,
            self.number_type
        ]

        self.table = Table(self.rows, self.column_names, self.column_types)
Example #22
0
    def setUp(self):
        self.rows = (
            ('1.7', 2000, 'a'),
            ('11.18', None, None),
            ('0', 1, 'c')
        )

        self.number_type = Number()
        self.international_number_type = Number(locale='de_DE.UTF-8')
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.number_type,
            self.international_number_type,
            self.text_type
        ]
Example #23
0
    def setUp(self):
        self.rows = (
            ('1.7', 2000, 2000, 'a'),
            ('11.18', None, None, None),
            ('0', 1, 1, 'c')
        )

        self.number_type = Number()
        self.american_number_type = Number(locale='en_US')
        self.german_number_type = Number(locale='de_DE.UTF-8')
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.number_type,
            self.american_number_type,
            self.german_number_type,
            self.text_type
        ]
Example #24
0
def print_structure(self, output=sys.stdout, max_rows=None):
    """
    Print this table's column names and types as a plain-text table.

    :param output:
        The output to print to.
    """
    from agate.table import Table

    name_column = [n for n in self._column_names]
    type_column = [t.__class__.__name__ for t in self._column_types]
    rows = zip(name_column, type_column)
    column_names = ['column', 'data_type']
    text = Text()
    column_types = [text, text]

    table = Table(rows, column_names, column_types)

    return table.print_table(output=output, max_column_width=None, max_rows=max_rows)
Example #25
0
    def setUp(self):
        self.table1 = (('a', 1), ('a', 3), ('b', 2))

        self.table2 = (('b', 0), ('a', 2), ('c', 5))

        self.table3 = (('a', 1), ('a', 2), ('c', 3))

        self.text_type = Text()
        self.number_type = Number()

        self.column_names = ['letter', 'number']
        self.column_types = [self.text_type, self.number_type]

        self.tables = OrderedDict([
            ('table1', Table(self.table1, self.column_names,
                             self.column_types)),
            ('table2', Table(self.table2, self.column_names,
                             self.column_types)),
            ('table3', Table(self.table3, self.column_names,
                             self.column_types))
        ])
Example #26
0
    def __init__(self, tables, keys, key_name='group', key_type=None):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        for table in tables:
            if table.column_types != self.column_types:
                raise ValueError('Not all tables have the same column types!')

            if table.column_names != self.column_names:
                raise ValueError('Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)
Example #27
0
    def setUp(self):
        self.left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.number_type = Number()
        self.text_type = Text()

        self.left_column_names = ['one', 'two', 'three']
        self.right_column_names = ['four', 'five', 'six']
        self.column_types = [self.number_type, self.number_type, self.text_type]

        self.left = Table(self.left_rows, self.left_column_names, self.column_types)
        self.right = Table(self.right_rows, self.right_column_names, self.column_types)
Example #28
0
    def test_max_length_unicode(self):
        """
        This text documents different handling of wide-unicode characters in
        Python 2 and Python 3. The former's behavior is broken, but can not
        be easily fixed.

        Bug: https://github.com/wireservice/agate/issues/649
        Reference: http://stackoverflow.com/a/35462951
        """
        rows = [['a'], [u'👍'], ['w']]

        table = Table(rows, ['test'], [Text()])

        MaxLength('test').validate(table)

        # Non 4-byte versions of Python 2 (but not PyPy)
        if sys.maxunicode <= 65535:
            self.assertEqual(MaxLength('test').run(table), 2)
        # Modern versions of Python
        else:
            self.assertEqual(MaxLength('test').run(table), 1)

        self.assertIsInstance(MaxLength('test').run(table), Decimal)
def print_structure(self, max_rows=20, output=sys.stdout):
    """
    Print the keys and row counts of each table in the tableset.

    :param max_rows:
        The maximum number of rows to display before truncating the data.
        Defaults to 20.
    :param output:
        The output used to print the structure of the :class:`Table`.
    :returns:
        None
    """
    max_length = min(len(self.items()), max_rows)

    name_column = self.keys()[0:max_length]
    type_column = [str(len(table.rows)) for key, table in self.items()[0:max_length]]
    rows = zip(name_column, type_column)
    column_names = ['table', 'rows']
    text = Text()
    column_types = [text, text]

    table = Table(rows, column_names, column_types)

    return table.print_table(output=output, max_column_width=None)
Example #30
0
    def bins(self, column_name, count=10, start=None, end=None):
        """
        Generates (approximately) evenly sized bins for the values in a column.
        Bins may not be perfectly even if the spread of the data does not divide
        evenly, but all values will always be included in some bin.

        The resulting table will have two columns. The first will have
        the same name as the specified column, but will be type :class:`.Text`.
        The second will be named :code:`count` and will be of type
        :class:`.Number`.

        :param column_name:
            The name of the column to bin. Must be of type :class:`.Number`
        :param count:
            The number of bins to create. If not specified then each value will
            be counted as its own bin.
        :param start:
            The minimum value to start the bins at. If not specified the
            minimum value in the column will be used.
        :param end:
            The maximum value to end the bins at. If not specified the maximum
            value in the column will be used.
        :returns:
            A new :class:`Table`.
        """
        if start is None or end is None:
            start, end = utils.round_limits(
                Min(column_name).run(self),
                Max(column_name).run(self))
        else:
            start = Decimal(start)
            end = Decimal(end)

        spread = abs(end - start)
        size = spread / count

        breaks = [start]

        for i in range(1, count + 1):
            top = start + (size * i)

            breaks.append(top)

        decimal_places = utils.max_precision(breaks)
        break_formatter = utils.make_number_formatter(decimal_places)

        def name_bin(i, j, first_exclusive=True, last_exclusive=False):
            inclusive = format_decimal(i, format=break_formatter)
            exclusive = format_decimal(j, format=break_formatter)

            output = u'[' if first_exclusive else u'('
            output += u'%s - %s' % (inclusive, exclusive)
            output += u']' if last_exclusive else u')'

            return output

        bins = OrderedDict()

        for i in range(1, len(breaks)):
            last_exclusive = (i == len(breaks) - 1)
            name = name_bin(breaks[i - 1],
                            breaks[i],
                            last_exclusive=last_exclusive)

            bins[name] = Decimal('0')

        for row in self._rows:
            value = row[column_name]

            if value is None:
                try:
                    bins[None] += 1
                except KeyError:
                    bins[None] = Decimal('1')

                continue  # pragma: no cover

            i = 1

            try:
                while value >= breaks[i]:
                    i += 1
            except IndexError:
                i -= 1

            last_exclusive = (i == len(breaks) - 1)
            name = name_bin(breaks[i - 1],
                            breaks[i],
                            last_exclusive=last_exclusive)

            bins[name] += 1

        column_names = [column_name, 'count']
        column_types = [Text(), Number()]

        return Table(bins.items(),
                     column_names,
                     column_types,
                     row_names=tuple(bins.keys()))