Python Count Examples, agate.aggregations.Count Python Examples

Example #1

0

Show file

    def test_count(self):
        rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (1, 2,
                                                                        'a'))

        table = Table(rows, self.column_names, self.column_types)

        self.assertIsInstance(Count().get_aggregate_data_type(table), Number)
        Count().validate(self.table)
        self.assertEqual(Count().run(table), 5)
        self.assertEqual(Count().run(table), 5)

Example #2

0

Show file

    def test_aggregate_row_names(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_table = tableset.aggregate([('count', Count())])

        self.assertRowNames(new_table, ['table1', 'table2', 'table3'])

Example #3

0

Show file

    def test_aggregate_key_name(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_table = tableset.aggregate([('count', Count())])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table, ('test', 'count'))
        self.assertColumnTypes(new_table, [Text, Number])

Example #4

0

Show file

 def test_multiple(self):
     self.assertEqual(
         self.table.aggregate([
             ('count', Count()),
             ('sum', Sum('two'))
         ]),
         {
             'count': 3,
             'sum': 9
         }
     )

Example #5

0

Show file

    def test_aggregate_min(self):
        tableset = TableSet(self.tables.values(), self.tables.keys())

        new_table = tableset.aggregate([('count', Count()),
                                        ('number_min', Min('number'))])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table, ('group', 'count', 'number_min'))
        self.assertColumnTypes(new_table, [Text, Number, Number])
        self.assertRows(new_table, [('table1', 3, 1), ('table2', 3, 0),
                                    ('table3', 3, 1)])

Example #6

0

Show file

File: test_having.py Project: makramjandar/DataProcessing

    def test_having_simple(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_tableset = tableset.having([('count', Count())],
                                       lambda t: t['count'] < 3)

        self.assertIsInstance(new_tableset, TableSet)
        self.assertSequenceEqual(new_tableset.keys(), ['table3'])
        self.assertIs(new_tableset.values()[0], tableset['table3'])
        self.assertEqual(new_tableset.key_name, 'test')

Example #7

0

Show file

File: test_having.py Project: makramjandar/DataProcessing

    def test_having_complex(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_tableset = tableset.having(
            [('count', Count()), ('number_sum', Sum('number'))],
            lambda t: t['count'] >= 3 and t['number_sum'] > 6)

        self.assertIsInstance(new_tableset, TableSet)
        self.assertSequenceEqual(new_tableset.keys(), ['table2'])
        self.assertIs(new_tableset.values()[0], tableset['table2'])
        self.assertEqual(new_tableset.key_name, 'test')

Example #8

0

Show file

    def test_aggregate_two_ops(self):
        tableset = TableSet(self.tables.values(), self.tables.keys())

        new_table = tableset.aggregate([('count', Count()),
                                        ('number_sum', Sum('number')),
                                        ('number_mean', Mean('number'))])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table,
                               ('group', 'count', 'number_sum', 'number_mean'))
        self.assertColumnTypes(new_table, [Text, Number, Number, Number])
        self.assertRows(new_table, [('table1', 3, 6, 2),
                                    ('table2', 3, 7, Decimal(7) / 3),
                                    ('table3', 3, 6, 2)])

Example #9

0

Show file

    def test_aggregate_key_type(self):
        tables = OrderedDict([
            (1, Table(self.table1, self.column_names, self.column_types)),
            (2, Table(self.table2, self.column_names, self.column_types)),
            (3, Table(self.table3, self.column_names, self.column_types))
        ])

        tableset = TableSet(tables.values(),
                            tables.keys(),
                            key_name='test',
                            key_type=self.number_type)

        new_table = tableset.aggregate([('count', Count())])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table, ('test', 'count'))
        self.assertColumnTypes(new_table, [Number, Number])

Example #10

0

Show file

    def test_nested_aggregation(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        nested = tableset.group_by('letter')

        results = nested.aggregate([('count', Count()),
                                    ('number_sum', Sum('number'))])

        self.assertIsInstance(results, Table)
        self.assertColumnNames(results,
                               ('test', 'letter', 'count', 'number_sum'))
        self.assertColumnTypes(results, (Text, Text, Number, Number))
        self.assertRows(results, [('table1', 'a', 2, 4), ('table1', 'b', 1, 2),
                                  ('table2', 'b', 1, 0), ('table2', 'a', 1, 2),
                                  ('table2', 'c', 1, 5), ('table3', 'a', 2, 3),
                                  ('table3', 'c', 1, 3)])

Example #11

0

Show file

    def test_nested_aggregate_row_names(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        nested = tableset.group_by('letter')

        results = nested.aggregate([('count', Count()),
                                    ('number_sum', Sum('number'))])

        self.assertRowNames(results, [
            ('table1', 'a'),
            ('table1', 'b'),
            ('table2', 'b'),
            ('table2', 'a'),
            ('table2', 'c'),
            ('table3', 'a'),
            ('table3', 'c'),
        ])
        self.assertSequenceEqual(results.rows[('table1', 'a')],
                                 ('table1', 'a', 2, 4))
        self.assertSequenceEqual(results.rows[('table2', 'c')],
                                 ('table2', 'c', 1, 5))

Example #12

0

Show file

def pivot(self,
          key=None,
          pivot=None,
          aggregation=None,
          computation=None,
          default_value=utils.default,
          key_name=None):
    """
    Create a new table by grouping the data, aggregating those groups,
    applying a computation, and then organizing the groups into new rows and
    columns.

    This is sometimes called a "crosstab".

    +---------+---------+--------+
    |  name   |  race   | gender |
    +=========+=========+========+
    |  Joe    |  white  | male   |
    +---------+---------+--------+
    |  Jane   |  black  | female |
    +---------+---------+--------+
    |  Josh   |  black  | male   |
    +---------+---------+--------+
    |  Jim    |  asian  | female |
    +---------+---------+--------+

    This table can be pivoted with :code:`key` equal to "race" and
    :code:`columns` equal to "gender". The default aggregation is
    :class:`.Count`. This would result in the following table.

    +---------+---------+--------+
    |  race   |  male   | female |
    +=========+=========+========+
    |  white  |  1      | 0      |
    +---------+---------+--------+
    |  black  |  1      | 1      |
    +---------+---------+--------+
    |  asian  |  0      | 1      |
    +---------+---------+--------+

    If one or more keys are specified then the resulting table will
    automatically have :code:`row_names` set to those keys.

    See also the related method :meth:`.Table.denormalize`.

    :param key:
        Either the name of a column from the this table to group by, a
        sequence of such column names, a :class:`function` that takes a
        row and returns a value to group by, or :code:`None`, in which case
        there will be only a single row in the output table.
    :param pivot:
        A column name whose unique values will become columns in the new
        table, or :code:`None` in which case there will be a single value
        column in the output table.
    :param aggregation:
        An instance of an :class:`.Aggregation` to perform on each group of
        data in the pivot table. (Each cell is the result of an aggregation
        of the grouped data.)

        If not specified this defaults to :class:`.Count` with no arguments.
    :param computation:
        An optional :class:`.Computation` instance to be applied to the
        aggregated sequence of values before they are transposed into the
        pivot table.

        Use the class name of the aggregation as your column name argument
        when constructing your computation. (This is "Count" if using the
        default value for :code:`aggregation`.)
    :param default_value:
        Value to be used for missing values in the pivot table. Defaults to
        :code:`Decimal(0)`. If performing non-mathematical aggregations you
        may wish to set this to :code:`None`.
    :param key_name:
        A name for the key column in the output table. This is most
        useful when the provided key is a function. This argument is not
        valid when :code:`key` is a sequence.
    :returns:
        A new :class:`.Table`.
    """
    if key is None:
        key = []
    elif not utils.issequence(key):
        key = [key]
    elif key_name:
        raise ValueError(
            'key_name is not a valid argument when key is a sequence.')

    if aggregation is None:
        aggregation = Count()

    groups = self

    for k in key:
        groups = groups.group_by(k, key_name=key_name)

    aggregation_name = six.text_type(aggregation)
    computation_name = six.text_type(computation) if computation else None

    def apply_computation(table):
        computed = table.compute([(computation_name, computation)])

        excluded = computed.exclude([aggregation_name])

        return excluded

    if pivot is not None:
        groups = groups.group_by(pivot)

        column_type = aggregation.get_aggregate_data_type(self)

        table = groups.aggregate([(aggregation_name, aggregation)])

        pivot_count = len(set(table.columns[pivot].values()))

        if computation is not None:
            column_types = computation.get_computed_data_type(table)
            table = apply_computation(table)

        column_types = [column_type] * pivot_count

        table = table.denormalize(key,
                                  pivot,
                                  computation_name or aggregation_name,
                                  default_value=default_value,
                                  column_types=column_types)
    else:
        table = groups.aggregate([(aggregation_name, aggregation)])

        if computation:
            table = apply_computation(table)

    return table

Example #13

0

Show file

 def test_count(self):
     self.assertEqual(self.table.aggregate(Count()), 3)

Example #14

0

Show file

File: pivot.py Project: livlab/agate

def pivot(self, key=None, pivot=None, aggregation=None, computation=None, default_value=utils.default, key_name=None):
    """
    Pivot reorganizes the data in a table by grouping the data, aggregating
    those groups, optionally applying a computation, and then organizing
    the groups into new rows and columns.

    For example:

    +---------+---------+-- ------+
    |  name   |  race   | gender |
    +=========+=========+========+
    |  Joe    |  white  | male   |
    +---------+---------+--------+
    |  Jane   |  black  | female |
    +---------+---------+--------+
    |  Josh   |  black  | male   |
    +---------+---------+--------+
    |  Jim    |  asian  | female |
    +---------+---------+--------+

    This table can be pivoted with :code:`key` equal to "race" and
    :code:`columns` equal to "gender". The default aggregation is
    :class:`.Count`. This would result in the following table.

    +---------+---------+--------+
    |  race   |  male   | female |
    +=========+=========+========+
    |  white  |  1      | 0      |
    +---------+---------+--------+
    |  black  |  1      | 1      |
    +---------+---------+--------+
    |  asian  |  0      | 1      |
    +---------+---------+--------+

    If one or more keys are specified then the resulting table will
    automatically have `row_names` set to those keys.

    See also the related method :meth:`Table.denormalize`.

    :param key:
        Either the name of a column from the this table to group by, a
        sequence of such column names, a :class:`function` that takes a
        row and returns a value to group by, or :code:`None`, in which case
        there will be only a single row in the output table.
    :param columns:
        A column name whose unique values will become columns in the new
        table, or :code:`None` in which case there will be a single value
        column in the output table.
    :param aggregation:
        An instance of an :class:`.Aggregation` to perform on each group of
        data in the pivot table. (Each cell is the result of an aggregation
        of the grouped data.)

        If not specified this defaults to :class:`.Count` with no arguments.
    :param computation:
        An optional :class:`.Computation` instance to be applied to the
        aggregated sequence of values before they are transposed into the
        pivot table.

        Use the class name of the aggregation as your column name argument
        when constructing your computation. (This is "Count" if using the
        default value for :code:`aggregation`.)
    :param default_value:
        Value to be used for missing values in the pivot table. Defaults to
        :code:`Decimal(0)`. If performing non-mathematical aggregations you
        may wish to set this to :code:`None`.
    :param key_name:
        A name for the key column in the output table. This is most
        useful when the provided key is a function. This argument is not
        valid when :code:`key` is a sequence.
    :returns:
        A new :class:`Table`.
    """
    if key is None:
        key = []
    elif not utils.issequence(key):
        key = [key]
    elif key_name:
        raise ValueError('key_name is not a valid argument when key is a sequence.')

    if aggregation is None:
        aggregation = Count()

    groups = self

    for k in key:
        groups = groups.group_by(k, key_name=key_name)

    aggregation_name = six.text_type(aggregation)
    computation_name = six.text_type(computation) if computation else None

    def apply_computation(table):
        computed = table.compute([
            (computation_name, computation)
        ])

        excluded = computed.exclude([aggregation_name])

        return excluded

    if pivot is not None:
        groups = groups.group_by(pivot)

        column_type = aggregation.get_aggregate_data_type(groups)

        table = groups.aggregate([
            (aggregation_name, aggregation)
        ])

        pivot_count = len(set(table.columns[pivot].values()))

        if computation is not None:
            column_types = computation.get_computed_data_type(table)
            table = apply_computation(table)

        column_types = [column_type] * pivot_count

        table = table.denormalize(key, pivot, computation_name or aggregation_name, default_value=default_value, column_types=column_types)
    else:
        table = groups.aggregate([
            (aggregation_name, aggregation)
        ])

        if computation:
            table = apply_computation(table)

    return table

Example #15

0

Show file

 def test_multiple(self):
     self.assertSequenceEqual(self.table.aggregate([Count(),
                                                    Sum('two')]), [3, 9])