Python Text Examples, agate.data_types.Text Python Examples

Example #1

0

Show file

File: test_data_types.py Project: wireservice/agate

    def test_no_cast_nulls(self):
        values = ('', 'N/A', None)

        t = Text()
        casted = tuple(t.cast(v) for v in values)
        self.assertSequenceEqual(casted, (None, None, None))

        t = Text(cast_nulls=False)
        casted = tuple(t.cast(v) for v in values)
        self.assertSequenceEqual(casted, ('', 'N/A', None))

Example #2

0

Show file

    def test_monkeypatch_shadow(self):
        before_table = Table([['blah'], ], ['foo'], [Text()])

        Table.monkeypatch(TryPatchShadow)

        after_table = Table([['blah'], ], ['foo'], [Text()])

        self.assertIsInstance(before_table.columns, MappedSequence)
        self.assertIsInstance(after_table.columns, MappedSequence)

        with self.assertRaises(AttributeError):
            after_table.foo == 'foo'

Example #3

0

Show file

File: test_normalize.py Project: wireservice/agate

    def test_normalize_column_types(self):
        table = Table(self.rows, self.column_names, self.column_types)

        normalized_table = table.normalize('one', 'three', column_types=[Text(), Text()])

        normal_rows = (
            (1, 'three', '4'),
            (2, 'three', '3'),
            (None, 'three', '2')
        )

        self.assertRows(normalized_table, normal_rows)
        self.assertColumnNames(normalized_table, ['one', 'property', 'value'])
        self.assertColumnTypes(normalized_table, [Number, Text, Text])

Example #4

0

Show file

File: test_type_tester.py Project: wireservice/agate

    def test_types_force_text(self):
        rows = [('1.7', ), ('200000000', ), ('', )]

        tester = TypeTester(types=[Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Text)

Example #5

0

Show file

    def __init__(self,
                 tables,
                 keys,
                 key_name='group',
                 key_type=None,
                 _is_fork=False):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        if not _is_fork:
            for table in tables:
                if any(not isinstance(a, type(b)) for a, b in zip_longest(
                        table.column_types, self._column_types)):
                    raise ValueError(
                        'Not all tables have the same column types!')

                if table.column_names != self._column_names:
                    raise ValueError(
                        'Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)

Example #6

0

Show file

    def setUp(self):
        self.table1 = (('a', 1, 4), ('b', 3, 7), ('c', 2, 2))

        self.table2 = (('a', 0, 3), ('b', 2, 3), ('c', 5, 3))

        self.table3 = (('a', 1, 10), ('b', 2, 1), ('c', 3, None))

        self.text_type = Text()
        self.number_type = Number()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.text_type, self.number_type, self.number_type
        ]

        self.tables = OrderedDict([
            ('table1', Table(self.table1, self.column_names,
                             self.column_types)),
            ('table2', Table(self.table2, self.column_names,
                             self.column_types)),
            ('table3', Table(self.table3, self.column_names,
                             self.column_types))
        ])

        self.tablesets = TableSet(self.tables.values(), self.tables.keys())

Example #7

0

Show file

File: test_type_tester.py Project: wireservice/agate

    def test_types_no_boolean(self):
        rows = [('True', ), ('False', ), ('False', )]

        tester = TypeTester(types=[Number(), Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Text)

Example #8

0

Show file

File: test_from_csv.py Project: wireservice/agate

    def test_from_csv_type_tester(self):
        tester = TypeTester(force={'number': Text()})

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(
            table, [Text, Text, Boolean, Date, DateTime, TimeDelta])

Example #9

0

Show file

File: test_type_tester.py Project: wireservice/agate

    def test_force_type(self):
        rows = [('1.7', ), ('200000000', ), ('', )]

        tester = TypeTester(force={'one': Text()})

        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Text)

Example #10

0

Show file

File: test_type_tester.py Project: wireservice/agate

    def test_types_number_locale(self):
        rows = [('1,7', ), ('200.000.000', ), ('', )]

        tester = TypeTester(types=[Number(locale='de_DE.UTF-8'), Text()])
        inferred = tester.run(rows, ['one'])

        self.assertIsInstance(inferred[0], Number)
        self.assertEqual(str(inferred[0].locale), 'de_DE')

Example #11

0

Show file

    def group_by(self, key, key_name=None, key_type=None):
        """
        Create a new :class:`Table` for unique value and return them as a
        :class:`.TableSet`. The :code:`key` can be either a column name
        or a function that returns a value to group by.

        Note that when group names will always be coerced to a string,
        regardless of the format of the input column.

        :param key: Either the name of a column from the this table
            to group by, or a :class:`function` that takes a row and returns
            a value to group by.
        :param key_name: A name that describes the grouped properties.
            Defaults to the column name that was grouped on or "group" if
            grouping with a key function. See :class:`.TableSet` for more.
        :param key_type: An instance some subclass of :class:`.DataType`. If
            not provided it will default to a :class`.Text`.
        :returns: A :class:`.TableSet` mapping where the keys are unique
            values from the :code:`key` and the values are new :class:`Table`
            instances containing the grouped rows.
        """
        from agate.tableset import TableSet

        key_is_row_function = hasattr(key, '__call__')

        if key_is_row_function:
            key_name = key_name or 'group'
            key_type = key_type or Text()
        else:
            column = self._columns[key]

            key_name = key_name or column.name
            key_type = key_type or column.data_type

        groups = OrderedDict()

        for row in self._rows:
            if key_is_row_function:
                group_name = key(row)
            else:
                group_name = row[column.name]

            group_name = key_type.cast(group_name)

            if group_name not in groups:
                groups[group_name] = []

            groups[group_name].append(row)

        output = OrderedDict()

        for group, rows in groups.items():
            output[group] = self._fork(rows)

        return TableSet(output.values(),
                        output.keys(),
                        key_name=key_name,
                        key_type=key_type)

Example #12

0

Show file

    def test_monkeypatch(self):
        before_table = Table([], ['foo'], [Text()])

        Table.monkeypatch(TryPatch)

        after_table = Table([], ['foo'], [Text()])

        self.assertSequenceEqual(Table.__bases__, [Patchable, TryPatch])

        self.assertIsNotNone(getattr(before_table, 'test'))
        self.assertIsNotNone(getattr(before_table, 'testcls'))

        self.assertIsNotNone(getattr(after_table, 'test'))
        self.assertIsNotNone(getattr(after_table, 'testcls'))

        self.assertEqual(before_table.test(5), 5)
        self.assertEqual(after_table.test(5), 5)
        self.assertEqual(Table.testcls(5), 5)

Example #13

0

Show file

    def counts(self, key, key_name=None, key_type=None):
        """
        Count the number of occurrences of each distinct value in a column.
        Creates a new table with only the value and the count. This is
        effectively equivalent to doing a :meth:`Table.group_by` followed by an
        :meth:`.TableSet.aggregate` with a :class:`.Length` aggregator.

        The resulting table will have two columns. The first will have
        the name and type of the specified :code:`key` column or
        :code:`key_name` and :code:`key_type`, if specified. The second will be
        named :code:`count` and will be of type :class:`.Number`.

        :param key:
            Either the name of a column from the this table to count, or a
            :class:`function` that takes a row and returns a value to count.
        :param key_name:
            A name that describes the counted properties. Defaults to the
            column name that was counted or "group" if counting with a key
            function.
        :param key_type:
            An instance some subclass of :class:`.DataType`. If not provided
            it will default to a :class`.Text`.
        """
        key_is_row_function = hasattr(key, '__call__')

        if key_is_row_function:
            key_name = key_name or 'group'
            key_type = key_type or Text()
        else:
            column = self._columns[key]

            key_name = key_name or column.name
            key_type = key_type or column.data_type

        output = OrderedDict()

        for row in self._rows:
            if key_is_row_function:
                group_name = key(row)
            else:
                group_name = row[key_name]

            group_name = key_type.cast(group_name)

            if group_name not in output:
                output[group_name] = 0

            output[group_name] += 1

        column_names = [key_name, 'count']
        column_types = [key_type, Number()]

        return Table(output.items(),
                     column_names,
                     column_types,
                     row_names=tuple(output.keys()))

Example #14

0

Show file

    def setUp(self):
        self.rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c'))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.number_type, self.number_type, self.text_type
        ]

Example #15

0

Show file

File: test_denormalize.py Project: wireservice/agate

    def setUp(self):
        self.rows = (('Jane', 'Code', 'gender', 'female'), ('Jane', 'Code',
                                                            'age', '27'),
                     ('Jim', 'Program', 'gender', 'male'), ('Jim', 'Bytes',
                                                            'age', '24'))

        self.text_type = Text()

        self.column_names = ['first_name', 'last_name', 'property', 'value']
        self.column_types = [
            self.text_type, self.text_type, self.text_type, self.text_type
        ]

Example #16

0

Show file

File: test_computations.py Project: wireservice/agate

    def setUp(self):
        self.rows = (('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None),
                     ('b', 3, 4, None))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.text_type, self.number_type, self.number_type,
            self.number_type
        ]

        self.table = Table(self.rows, self.column_names, self.column_types)

Example #17

0

Show file

File: test_to_csv.py Project: wireservice/agate

    def setUp(self):
        self.rows = (
            (1, 'a', True, '11/4/2015', '11/4/2015 12:22 PM', '4:15'),
            (2, u'👍', False, '11/5/2015', '11/4/2015 12:45 PM', '6:18'),
            (None, 'b', None, None, None, None)
        )

        self.column_names = [
            'number', 'text', 'boolean', 'date', 'datetime', 'timedelta'
        ]

        self.column_types = [
            Number(), Text(), Boolean(), Date(), DateTime(), TimeDelta()
        ]

Example #18

0

Show file

File: test_group_by.py Project: wireservice/agate

    def test_group_by_key_type(self):
        table = Table(self.rows, self.column_names, self.column_types)

        tableset = table.group_by('two', key_type=Text())

        self.assertIsInstance(tableset, TableSet)
        self.assertEqual(tableset.key_name, 'two')
        self.assertIsInstance(tableset.key_type, Text)

        self.assertIn('2', tableset.keys())
        self.assertIn('3', tableset.keys())

        self.assertSequenceEqual(tableset['2'].columns['one'], ('a', 'a'))
        self.assertSequenceEqual(tableset['3'].columns['one'], (None, 'b'))

Example #19

0

Show file

File: test_denormalize.py Project: wireservice/agate

    def test_denormalize_column_types(self):
        table = Table(self.rows, self.column_names, self.column_types)

        normalized_table = table.denormalize(None,
                                             'property',
                                             'value',
                                             column_types=[Text(),
                                                           Number()])

        # NB: value has been overwritten
        normal_rows = (('male', 24), )

        self.assertRows(normalized_table, normal_rows)
        self.assertColumnNames(normalized_table, ['gender', 'age'])
        self.assertColumnTypes(normalized_table, [Text, Number])

Example #20

0

Show file

    def setUp(self):
        self.rows = (('joe', 'white', 'male', 20,
                      'blue'), ('jane', 'white', 'female', 20,
                                'blue'), ('josh', 'black', 'male', 20, 'blue'),
                     ('jim', 'latino', 'male', 25,
                      'blue'), ('julia', 'white', 'female', 25, 'green'),
                     ('joan', 'asian', 'female', 25, 'green'))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['name', 'race', 'gender', 'age', 'color']
        self.column_types = [
            self.text_type, self.text_type, self.text_type, self.number_type,
            self.text_type
        ]

Example #21

0

Show file

    def setUp(self):
        self.rows = ((Decimal('1.1'), Decimal('2.19'), 'a',
                      None), (Decimal('2.7'), Decimal('3.42'), 'b',
                              None), (None, Decimal('4.1'), 'c', None),
                     (Decimal('2.7'), Decimal('3.42'), 'c', None))

        self.number_type = Number()
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.number_type, self.number_type, self.text_type,
            self.number_type
        ]

        self.table = Table(self.rows, self.column_names, self.column_types)

Example #22

0

Show file

File: test_print_structure.py Project: wireservice/agate

    def setUp(self):
        self.rows = (
            ('1.7', 2000, 'a'),
            ('11.18', None, None),
            ('0', 1, 'c')
        )

        self.number_type = Number()
        self.international_number_type = Number(locale='de_DE.UTF-8')
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three']
        self.column_types = [
            self.number_type,
            self.international_number_type,
            self.text_type
        ]

Example #23

0

Show file

    def setUp(self):
        self.rows = (
            ('1.7', 2000, 2000, 'a'),
            ('11.18', None, None, None),
            ('0', 1, 1, 'c')
        )

        self.number_type = Number()
        self.american_number_type = Number(locale='en_US')
        self.german_number_type = Number(locale='de_DE.UTF-8')
        self.text_type = Text()

        self.column_names = ['one', 'two', 'three', 'four']
        self.column_types = [
            self.number_type,
            self.american_number_type,
            self.german_number_type,
            self.text_type
        ]

Example #24

0

Show file

def print_structure(self, output=sys.stdout, max_rows=None):
    """
    Print this table's column names and types as a plain-text table.

    :param output:
        The output to print to.
    """
    from agate.table import Table

    name_column = [n for n in self._column_names]
    type_column = [t.__class__.__name__ for t in self._column_types]
    rows = zip(name_column, type_column)
    column_names = ['column', 'data_type']
    text = Text()
    column_types = [text, text]

    table = Table(rows, column_names, column_types)

    return table.print_table(output=output, max_column_width=None, max_rows=max_rows)

Example #25

0

Show file

    def setUp(self):
        self.table1 = (('a', 1), ('a', 3), ('b', 2))

        self.table2 = (('b', 0), ('a', 2), ('c', 5))

        self.table3 = (('a', 1), ('a', 2), ('c', 3))

        self.text_type = Text()
        self.number_type = Number()

        self.column_names = ['letter', 'number']
        self.column_types = [self.text_type, self.number_type]

        self.tables = OrderedDict([
            ('table1', Table(self.table1, self.column_names,
                             self.column_types)),
            ('table2', Table(self.table2, self.column_names,
                             self.column_types)),
            ('table3', Table(self.table3, self.column_names,
                             self.column_types))
        ])

Example #26

0

Show file

File: tableset.py Project: ritviksahajpal/agate

    def __init__(self, tables, keys, key_name='group', key_type=None):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        for table in tables:
            if table.column_types != self.column_types:
                raise ValueError('Not all tables have the same column types!')

            if table.column_names != self.column_names:
                raise ValueError('Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)

Example #27

0

Show file

File: test_join.py Project: wireservice/agate

    def setUp(self):
        self.left_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.right_rows = (
            (1, 4, 'a'),
            (2, 3, 'b'),
            (None, 2, 'c')
        )

        self.number_type = Number()
        self.text_type = Text()

        self.left_column_names = ['one', 'two', 'three']
        self.right_column_names = ['four', 'five', 'six']
        self.column_types = [self.number_type, self.number_type, self.text_type]

        self.left = Table(self.left_rows, self.left_column_names, self.column_types)
        self.right = Table(self.right_rows, self.right_column_names, self.column_types)

Example #28

0

Show file

    def test_max_length_unicode(self):
        """
        This text documents different handling of wide-unicode characters in
        Python 2 and Python 3. The former's behavior is broken, but can not
        be easily fixed.

        Bug: https://github.com/wireservice/agate/issues/649
        Reference: http://stackoverflow.com/a/35462951
        """
        rows = [['a'], [u'👍'], ['w']]

        table = Table(rows, ['test'], [Text()])

        MaxLength('test').validate(table)

        # Non 4-byte versions of Python 2 (but not PyPy)
        if sys.maxunicode <= 65535:
            self.assertEqual(MaxLength('test').run(table), 2)
        # Modern versions of Python
        else:
            self.assertEqual(MaxLength('test').run(table), 1)

        self.assertIsInstance(MaxLength('test').run(table), Decimal)

Example #29

0

Show file

File: print_structure.py Project: indu-s-bhagavatula/rs_file_loader

def print_structure(self, max_rows=20, output=sys.stdout):
    """
    Print the keys and row counts of each table in the tableset.

    :param max_rows:
        The maximum number of rows to display before truncating the data.
        Defaults to 20.
    :param output:
        The output used to print the structure of the :class:`Table`.
    :returns:
        None
    """
    max_length = min(len(self.items()), max_rows)

    name_column = self.keys()[0:max_length]
    type_column = [str(len(table.rows)) for key, table in self.items()[0:max_length]]
    rows = zip(name_column, type_column)
    column_names = ['table', 'rows']
    text = Text()
    column_types = [text, text]

    table = Table(rows, column_names, column_types)

    return table.print_table(output=output, max_column_width=None)

Example #30

0

Show file

    def bins(self, column_name, count=10, start=None, end=None):
        """
        Generates (approximately) evenly sized bins for the values in a column.
        Bins may not be perfectly even if the spread of the data does not divide
        evenly, but all values will always be included in some bin.

        The resulting table will have two columns. The first will have
        the same name as the specified column, but will be type :class:`.Text`.
        The second will be named :code:`count` and will be of type
        :class:`.Number`.

        :param column_name:
            The name of the column to bin. Must be of type :class:`.Number`
        :param count:
            The number of bins to create. If not specified then each value will
            be counted as its own bin.
        :param start:
            The minimum value to start the bins at. If not specified the
            minimum value in the column will be used.
        :param end:
            The maximum value to end the bins at. If not specified the maximum
            value in the column will be used.
        :returns:
            A new :class:`Table`.
        """
        if start is None or end is None:
            start, end = utils.round_limits(
                Min(column_name).run(self),
                Max(column_name).run(self))
        else:
            start = Decimal(start)
            end = Decimal(end)

        spread = abs(end - start)
        size = spread / count

        breaks = [start]

        for i in range(1, count + 1):
            top = start + (size * i)

            breaks.append(top)

        decimal_places = utils.max_precision(breaks)
        break_formatter = utils.make_number_formatter(decimal_places)

        def name_bin(i, j, first_exclusive=True, last_exclusive=False):
            inclusive = format_decimal(i, format=break_formatter)
            exclusive = format_decimal(j, format=break_formatter)

            output = u'[' if first_exclusive else u'('
            output += u'%s - %s' % (inclusive, exclusive)
            output += u']' if last_exclusive else u')'

            return output

        bins = OrderedDict()

        for i in range(1, len(breaks)):
            last_exclusive = (i == len(breaks) - 1)
            name = name_bin(breaks[i - 1],
                            breaks[i],
                            last_exclusive=last_exclusive)

            bins[name] = Decimal('0')

        for row in self._rows:
            value = row[column_name]

            if value is None:
                try:
                    bins[None] += 1
                except KeyError:
                    bins[None] = Decimal('1')

                continue  # pragma: no cover

            i = 1

            try:
                while value >= breaks[i]:
                    i += 1
            except IndexError:
                i -= 1

            last_exclusive = (i == len(breaks) - 1)
            name = name_bin(breaks[i - 1],
                            breaks[i],
                            last_exclusive=last_exclusive)

            bins[name] += 1

        column_names = [column_name, 'count']
        column_types = [Text(), Number()]

        return Table(bins.items(),
                     column_names,
                     column_types,
                     row_names=tuple(bins.keys()))