Example #1
0
    def get_computed_data_type(self, table):
        before_column = table.columns[self._before_column_name]

        if isinstance(before_column.data_type, (Date, DateTime, TimeDelta)):
            return TimeDelta()
        elif isinstance(before_column.data_type, Number):
            return Number()
Example #2
0
    def test_max_length_invalid(self):
        rows = [[1], [2], [3]]

        table = Table(rows, ['test'], [Number()])

        with self.assertRaises(DataTypeError):
            MaxLength('test').validate(table)
Example #3
0
    def test_types_no_boolean(self):
        rows = [('True', ), ('False', ), ('False', )]

        tester = TypeTester(types=[Number(), Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Text)
Example #4
0
    def setUp(self):
        self.table1 = (('a', 1, 4), ('b', 3, 7), ('c', 2, 2))

        self.table2 = (('a', 0, 3), ('b', 2, 3), ('c', 5, 3))

        self.table3 = (('a', 1, 10), ('b', 2, 1), ('c', 3, None))

        self.text_type = Text()
        self.number_type = Number()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.text_type, self.number_type, self.number_type
        ]

        self.tables = OrderedDict([
            ('table1', Table(self.table1, self.column_names,
                             self.column_types)),
            ('table2', Table(self.table2, self.column_names,
                             self.column_types)),
            ('table3', Table(self.table3, self.column_names,
                             self.column_types))
        ])

        self.tablesets = TableSet(self.tables.values(), self.tables.keys())
Example #5
0
    def test_types_number_locale(self):
        rows = [('1,7', ), ('200.000.000', ), ('', )]

        tester = TypeTester(types=[Number(locale='de_DE.UTF-8'), Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Number)
        self.assertEqual(str(inferred[0].locale), 'de_DE')
    def setUp(self):
        self.rows = (
            ('1.7', 2000, 'a'),
            ('11.18', None, None),
            ('0', 1, 'c')
        )

        self.number_type = Number()
        self.international_number_type = Number(locale='de_DE.UTF-8')
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.number_type,
            self.international_number_type,
            self.text_type
        ]
Example #7
0
    def counts(self, key, key_name=None, key_type=None):
        """
        Count the number of occurrences of each distinct value in a column.
        Creates a new table with only the value and the count. This is
        effectively equivalent to doing a :meth:`Table.group_by` followed by an
        :meth:`.TableSet.aggregate` with a :class:`.Length` aggregator.

        The resulting table will have two columns. The first will have
        the name and type of the specified :code:`key` column or
        :code:`key_name` and :code:`key_type`, if specified. The second will be
        named :code:`count` and will be of type :class:`.Number`.

        :param key:
            Either the name of a column from the this table to count, or a
            :class:`function` that takes a row and returns a value to count.
        :param key_name:
            A name that describes the counted properties. Defaults to the
            column name that was counted or "group" if counting with a key
            function.
        :param key_type:
            An instance some subclass of :class:`.DataType`. If not provided
            it will default to a :class`.Text`.
        """
        key_is_row_function = hasattr(key, '__call__')

        if key_is_row_function:
            key_name = key_name or 'group'
            key_type = key_type or Text()
        else:
            column = self._columns[key]

            key_name = key_name or column.name
            key_type = key_type or column.data_type

        output = OrderedDict()

        for row in self._rows:
            if key_is_row_function:
                group_name = key(row)
            else:
                group_name = row[key_name]

            group_name = key_type.cast(group_name)

            if group_name not in output:
                output[group_name] = 0

            output[group_name] += 1

        column_names = [key_name, 'count']
        column_types = [key_type, Number()]

        return Table(output.items(),
                     column_names,
                     column_types,
                     row_names=tuple(output.keys()))
Example #8
0
    def setUp(self):
        self.rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c'))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.number_type, self.number_type, self.text_type
        ]
Example #9
0
    def setUp(self):
        self.rows = (
            ('1.7', 2000, 2000, 'a'),
            ('11.18', None, None, None),
            ('0', 1, 1, 'c')
        )

        self.number_type = Number()
        self.american_number_type = Number(locale='en_US')
        self.german_number_type = Number(locale='de_DE.UTF-8')
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.number_type,
            self.american_number_type,
            self.german_number_type,
            self.text_type
        ]
Example #10
0
    def test_change_mixed_types(self):
        rows = (('1', '10/24/1978'), ('2', '11/13/1974'))

        column_names = ['number', 'date']
        column_types = [Number(), Date()]

        table = Table(rows, column_names, column_types)

        with self.assertRaises(DataTypeError):
            table.compute([('test', Change('number', 'date'))])
Example #11
0
    def get_computed_data_type(self, table):
        before_column = self._validate(table)

        if isinstance(before_column.data_type, Date):
            return TimeDelta()
        elif isinstance(before_column.data_type, DateTime):
            return TimeDelta()
        elif isinstance(before_column.data_type, TimeDelta):
            return TimeDelta()
        elif isinstance(before_column.data_type, Number):
            return Number()
Example #12
0
    def setUp(self):
        self.rows = (
            (1, 'a', True, '11/4/2015', '11/4/2015 12:22 PM', '4:15'),
            (2, u'๐Ÿ‘', False, '11/5/2015', '11/4/2015 12:45 PM', '6:18'),
            (None, 'b', None, None, None, None)
        )

        self.column_names = [
            'number', 'text', 'boolean', 'date', 'datetime', 'timedelta'
        ]

        self.column_types = [
            Number(), Text(), Boolean(), Date(), DateTime(), TimeDelta()
        ]
Example #13
0
    def setUp(self):
        self.rows = (('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None),
                     ('b', 3, 4, None))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.text_type, self.number_type, self.number_type,
            self.number_type
        ]

        self.table = Table(self.rows, self.column_names, self.column_types)
Example #14
0
    def test_proxy_maintains_key(self):
        number_type = Number()

        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='foo',
                            key_type=number_type)

        self.assertEqual(tableset.key_name, 'foo')
        self.assertEqual(tableset.key_type, number_type)

        new_tableset = tableset.select(['number'])

        self.assertEqual(new_tableset.key_name, 'foo')
        self.assertEqual(new_tableset.key_type, number_type)
Example #15
0
    def test_denormalize_column_types(self):
        table = Table(self.rows, self.column_names, self.column_types)

        normalized_table = table.denormalize(None,
                                             'property',
                                             'value',
                                             column_types=[Text(),
                                                           Number()])

        # NB: value has been overwritten
        normal_rows = (('male', 24), )

        self.assertRows(normalized_table, normal_rows)
        self.assertColumnNames(normalized_table, ['gender', 'age'])
        self.assertColumnTypes(normalized_table, [Text, Number])
Example #16
0
    def setUp(self):
        self.rows = ((Decimal('1.1'), Decimal('2.19'), 'a',
                      None), (Decimal('2.7'), Decimal('3.42'), 'b',
                              None), (None, Decimal('4.1'), 'c', None),
                     (Decimal('2.7'), Decimal('3.42'), 'c', None))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.number_type, self.number_type, self.text_type,
            self.number_type
        ]

        self.table = Table(self.rows, self.column_names, self.column_types)
Example #17
0
    def setUp(self):
        self.rows = (('joe', 'white', 'male', 20,
                      'blue'), ('jane', 'white', 'female', 20,
                                'blue'), ('josh', 'black', 'male', 20, 'blue'),
                     ('jim', 'latino', 'male', 25,
                      'blue'), ('julia', 'white', 'female', 25, 'green'),
                     ('joan', 'asian', 'female', 25, 'green'))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['name', 'race', 'gender', 'age', 'color']
        self.column_types = [
            self.text_type, self.text_type, self.text_type, self.number_type,
            self.text_type
        ]
Example #18
0
    def setUp(self):
        self.table1 = (('a', 1), ('a', 3), ('b', 2))

        self.table2 = (('b', 0), ('a', 2), ('c', 5))

        self.table3 = (('a', 1), ('a', 2), ('c', 3))

        self.text_type = Text()
        self.number_type = Number()

        self.column_names = ['letter', 'number']
        self.column_types = [self.text_type, self.number_type]

        self.tables = OrderedDict([
            ('table1', Table(self.table1, self.column_names,
                             self.column_types)),
            ('table2', Table(self.table2, self.column_names,
                             self.column_types)),
            ('table3', Table(self.table3, self.column_names,
                             self.column_types))
        ])
Example #19
0
    def setUp(self):
        self.left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.number_type = Number()
        self.text_type = Text()

        self.left_column_names = ['one', 'two', 'three']
        self.right_column_names = ['four', 'five', 'six']
        self.column_types = [self.number_type, self.number_type, self.text_type]

        self.left = Table(self.left_rows, self.left_column_names, self.column_types)
        self.right = Table(self.right_rows, self.right_column_names, self.column_types)
Example #20
0
 def get_computed_data_type(self, table):
     return Number()
Example #21
0
 def setUp(self):
     self.number_type = Number()
     self.column_names = ['number']
     self.column_types = [self.number_type]
Example #22
0
class TestNumber(unittest.TestCase):
    def setUp(self):
        self.type = Number()

    def test_test(self):
        self.assertEqual(self.type.test(None), True)
        self.assertEqual(self.type.test('N/A'), True)
        self.assertEqual(self.type.test(True), True)
        self.assertEqual(self.type.test('True'), False)
        self.assertEqual(self.type.test(1), True)
        self.assertEqual(self.type.test(Decimal('1')), True)
        self.assertEqual(self.type.test('2.7'), True)
        self.assertEqual(self.type.test(2.7), True)
        self.assertEqual(self.type.test('3/1/1994'), False)
        self.assertEqual(self.type.test(datetime.date(1994, 3, 1)), False)
        self.assertEqual(self.type.test('3/1/1994 12:30 PM'), False)
        self.assertEqual(self.type.test('2015-01-01 02:34'), False)
        self.assertEqual(self.type.test(datetime.datetime(1994, 3, 1, 12, 30)),
                         False)
        self.assertEqual(self.type.test('4:10'), False)
        self.assertEqual(
            self.type.test(datetime.timedelta(hours=4, minutes=10)), False)
        self.assertEqual(self.type.test('a'), False)
        self.assertEqual(self.type.test('A\nB'), False)
        self.assertEqual(self.type.test(u'๐Ÿ‘'), False)
        self.assertEqual(self.type.test('05_leslie3d_base'), False)
        self.assertEqual(self.type.test('2016-12-29'), False)
        self.assertEqual(self.type.test('2016-12-29T11:43:30Z'), False)
        self.assertEqual(self.type.test('2016-12-29T11:43:30+06:00'), False)
        self.assertEqual(self.type.test('2016-12-29T11:43:30-06:00'), False)

    def test_cast(self):
        values = (2, 1, None, Decimal('2.7'), 'n/a', '2.7', '200,000,000')
        casted = tuple(self.type.cast(v) for v in values)
        self.assertSequenceEqual(
            casted, (Decimal('2'), Decimal('1'), None, Decimal('2.7'), None,
                     Decimal('2.7'), Decimal('200000000')))

    @unittest.skipIf(six.PY3, 'Not supported in Python 3.')
    def test_cast_long(self):
        self.assertEqual(self.type.test(long('141414')), True)  # noqa: F821
        self.assertEqual(self.type.cast(long('141414')),
                         Decimal('141414'))  # noqa: F821

    def test_boolean_cast(self):
        values = (True, False)
        casted = tuple(self.type.cast(v) for v in values)
        self.assertSequenceEqual(casted, (Decimal('1'), Decimal('0')))

    def test_currency_cast(self):
        values = ('$2.70', '-$0.70', u'โ‚ฌ14', u'50ยข', u'-75ยข', u'-$1,287')
        casted = tuple(self.type.cast(v) for v in values)
        self.assertSequenceEqual(
            casted, (Decimal('2.7'), Decimal('-0.7'), Decimal('14'),
                     Decimal('50'), Decimal('-75'), Decimal('-1287')))

    def test_cast_locale(self):
        values = (2, 1, None, Decimal('2.7'), 'n/a', '2,7', '200.000.000')
        casted = tuple(Number(locale='de_DE.UTF-8').cast(v) for v in values)
        self.assertSequenceEqual(
            casted, (Decimal('2'), Decimal('1'), None, Decimal('2.7'), None,
                     Decimal('2.7'), Decimal('200000000')))

    def test_cast_text(self):
        with self.assertRaises(CastError):
            self.type.cast('a')

    def test_cast_floats(self):
        self.assertAlmostEqual(self.type.cast(0.1 + 0.2), Decimal('0.3'))
        self.assertEqual(self.type.cast(0.12345123456),
                         Decimal('0.12345123456'))

    def test_cast_error(self):
        with self.assertRaises(CastError):
            self.type.cast('quack')
Example #23
0
    def bins(self, column_name, count=10, start=None, end=None):
        """
        Generates (approximately) evenly sized bins for the values in a column.
        Bins may not be perfectly even if the spread of the data does not divide
        evenly, but all values will always be included in some bin.

        The resulting table will have two columns. The first will have
        the same name as the specified column, but will be type :class:`.Text`.
        The second will be named :code:`count` and will be of type
        :class:`.Number`.

        :param column_name:
            The name of the column to bin. Must be of type :class:`.Number`
        :param count:
            The number of bins to create. If not specified then each value will
            be counted as its own bin.
        :param start:
            The minimum value to start the bins at. If not specified the
            minimum value in the column will be used.
        :param end:
            The maximum value to end the bins at. If not specified the maximum
            value in the column will be used.
        :returns:
            A new :class:`Table`.
        """
        if start is None or end is None:
            start, end = utils.round_limits(
                Min(column_name).run(self),
                Max(column_name).run(self))
        else:
            start = Decimal(start)
            end = Decimal(end)

        spread = abs(end - start)
        size = spread / count

        breaks = [start]

        for i in range(1, count + 1):
            top = start + (size * i)

            breaks.append(top)

        decimal_places = utils.max_precision(breaks)
        break_formatter = utils.make_number_formatter(decimal_places)

        def name_bin(i, j, first_exclusive=True, last_exclusive=False):
            inclusive = format_decimal(i, format=break_formatter)
            exclusive = format_decimal(j, format=break_formatter)

            output = u'[' if first_exclusive else u'('
            output += u'%s - %s' % (inclusive, exclusive)
            output += u']' if last_exclusive else u')'

            return output

        bins = OrderedDict()

        for i in range(1, len(breaks)):
            last_exclusive = (i == len(breaks) - 1)
            name = name_bin(breaks[i - 1],
                            breaks[i],
                            last_exclusive=last_exclusive)

            bins[name] = Decimal('0')

        for row in self._rows:
            value = row[column_name]

            if value is None:
                try:
                    bins[None] += 1
                except KeyError:
                    bins[None] = Decimal('1')

                continue  # pragma: no cover

            i = 1

            try:
                while value >= breaks[i]:
                    i += 1
            except IndexError:
                i -= 1

            last_exclusive = (i == len(breaks) - 1)
            name = name_bin(breaks[i - 1],
                            breaks[i],
                            last_exclusive=last_exclusive)

            bins[name] += 1

        column_names = [column_name, 'count']
        column_types = [Text(), Number()]

        return Table(bins.items(),
                     column_names,
                     column_types,
                     row_names=tuple(bins.keys()))
Example #24
0
 def setUp(self):
     self.type = Number()
Example #25
0
 def test_cast_locale(self):
     values = (2, 1, None, Decimal('2.7'), 'n/a', '2,7', '200.000.000')
     casted = tuple(Number(locale='de_DE.UTF-8').cast(v) for v in values)
     self.assertSequenceEqual(
         casted, (Decimal('2'), Decimal('1'), None, Decimal('2.7'), None,
                  Decimal('2.7'), Decimal('200000000')))
Example #26
0
 def get_aggregate_data_type(self, table):
     return Number()
Example #27
0
 def get_aggregate_data_type(self, column):
     return Number()