Example #1
0
    def test_count(self):
        rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (1, 2,
                                                                        'a'))

        table = Table(rows, self.column_names, self.column_types)

        self.assertIsInstance(Count().get_aggregate_data_type(table), Number)
        Count().validate(self.table)
        self.assertEqual(Count().run(table), 5)
        self.assertEqual(Count().run(table), 5)
Example #2
0
    def test_aggregate_row_names(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_table = tableset.aggregate([('count', Count())])

        self.assertRowNames(new_table, ['table1', 'table2', 'table3'])
Example #3
0
    def test_aggregate_key_name(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_table = tableset.aggregate([('count', Count())])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table, ('test', 'count'))
        self.assertColumnTypes(new_table, [Text, Number])
Example #4
0
 def test_multiple(self):
     self.assertEqual(
         self.table.aggregate([
             ('count', Count()),
             ('sum', Sum('two'))
         ]),
         {
             'count': 3,
             'sum': 9
         }
     )
Example #5
0
    def test_aggregate_min(self):
        tableset = TableSet(self.tables.values(), self.tables.keys())

        new_table = tableset.aggregate([('count', Count()),
                                        ('number_min', Min('number'))])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table, ('group', 'count', 'number_min'))
        self.assertColumnTypes(new_table, [Text, Number, Number])
        self.assertRows(new_table, [('table1', 3, 1), ('table2', 3, 0),
                                    ('table3', 3, 1)])
    def test_having_simple(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_tableset = tableset.having([('count', Count())],
                                       lambda t: t['count'] < 3)

        self.assertIsInstance(new_tableset, TableSet)
        self.assertSequenceEqual(new_tableset.keys(), ['table3'])
        self.assertIs(new_tableset.values()[0], tableset['table3'])
        self.assertEqual(new_tableset.key_name, 'test')
    def test_having_complex(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_tableset = tableset.having(
            [('count', Count()), ('number_sum', Sum('number'))],
            lambda t: t['count'] >= 3 and t['number_sum'] > 6)

        self.assertIsInstance(new_tableset, TableSet)
        self.assertSequenceEqual(new_tableset.keys(), ['table2'])
        self.assertIs(new_tableset.values()[0], tableset['table2'])
        self.assertEqual(new_tableset.key_name, 'test')
Example #8
0
    def test_aggregate_two_ops(self):
        tableset = TableSet(self.tables.values(), self.tables.keys())

        new_table = tableset.aggregate([('count', Count()),
                                        ('number_sum', Sum('number')),
                                        ('number_mean', Mean('number'))])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table,
                               ('group', 'count', 'number_sum', 'number_mean'))
        self.assertColumnTypes(new_table, [Text, Number, Number, Number])
        self.assertRows(new_table, [('table1', 3, 6, 2),
                                    ('table2', 3, 7, Decimal(7) / 3),
                                    ('table3', 3, 6, 2)])
Example #9
0
    def test_aggregate_key_type(self):
        tables = OrderedDict([
            (1, Table(self.table1, self.column_names, self.column_types)),
            (2, Table(self.table2, self.column_names, self.column_types)),
            (3, Table(self.table3, self.column_names, self.column_types))
        ])

        tableset = TableSet(tables.values(),
                            tables.keys(),
                            key_name='test',
                            key_type=self.number_type)

        new_table = tableset.aggregate([('count', Count())])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table, ('test', 'count'))
        self.assertColumnTypes(new_table, [Number, Number])
Example #10
0
    def test_nested_aggregation(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        nested = tableset.group_by('letter')

        results = nested.aggregate([('count', Count()),
                                    ('number_sum', Sum('number'))])

        self.assertIsInstance(results, Table)
        self.assertColumnNames(results,
                               ('test', 'letter', 'count', 'number_sum'))
        self.assertColumnTypes(results, (Text, Text, Number, Number))
        self.assertRows(results, [('table1', 'a', 2, 4), ('table1', 'b', 1, 2),
                                  ('table2', 'b', 1, 0), ('table2', 'a', 1, 2),
                                  ('table2', 'c', 1, 5), ('table3', 'a', 2, 3),
                                  ('table3', 'c', 1, 3)])
Example #11
0
    def test_nested_aggregate_row_names(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        nested = tableset.group_by('letter')

        results = nested.aggregate([('count', Count()),
                                    ('number_sum', Sum('number'))])

        self.assertRowNames(results, [
            ('table1', 'a'),
            ('table1', 'b'),
            ('table2', 'b'),
            ('table2', 'a'),
            ('table2', 'c'),
            ('table3', 'a'),
            ('table3', 'c'),
        ])
        self.assertSequenceEqual(results.rows[('table1', 'a')],
                                 ('table1', 'a', 2, 4))
        self.assertSequenceEqual(results.rows[('table2', 'c')],
                                 ('table2', 'c', 1, 5))
Example #12
0
def pivot(self,
          key=None,
          pivot=None,
          aggregation=None,
          computation=None,
          default_value=utils.default,
          key_name=None):
    """
    Create a new table by grouping the data, aggregating those groups,
    applying a computation, and then organizing the groups into new rows and
    columns.

    This is sometimes called a "crosstab".

    +---------+---------+--------+
    |  name   |  race   | gender |
    +=========+=========+========+
    |  Joe    |  white  | male   |
    +---------+---------+--------+
    |  Jane   |  black  | female |
    +---------+---------+--------+
    |  Josh   |  black  | male   |
    +---------+---------+--------+
    |  Jim    |  asian  | female |
    +---------+---------+--------+

    This table can be pivoted with :code:`key` equal to "race" and
    :code:`columns` equal to "gender". The default aggregation is
    :class:`.Count`. This would result in the following table.

    +---------+---------+--------+
    |  race   |  male   | female |
    +=========+=========+========+
    |  white  |  1      | 0      |
    +---------+---------+--------+
    |  black  |  1      | 1      |
    +---------+---------+--------+
    |  asian  |  0      | 1      |
    +---------+---------+--------+

    If one or more keys are specified then the resulting table will
    automatically have :code:`row_names` set to those keys.

    See also the related method :meth:`.Table.denormalize`.

    :param key:
        Either the name of a column from the this table to group by, a
        sequence of such column names, a :class:`function` that takes a
        row and returns a value to group by, or :code:`None`, in which case
        there will be only a single row in the output table.
    :param pivot:
        A column name whose unique values will become columns in the new
        table, or :code:`None` in which case there will be a single value
        column in the output table.
    :param aggregation:
        An instance of an :class:`.Aggregation` to perform on each group of
        data in the pivot table. (Each cell is the result of an aggregation
        of the grouped data.)

        If not specified this defaults to :class:`.Count` with no arguments.
    :param computation:
        An optional :class:`.Computation` instance to be applied to the
        aggregated sequence of values before they are transposed into the
        pivot table.

        Use the class name of the aggregation as your column name argument
        when constructing your computation. (This is "Count" if using the
        default value for :code:`aggregation`.)
    :param default_value:
        Value to be used for missing values in the pivot table. Defaults to
        :code:`Decimal(0)`. If performing non-mathematical aggregations you
        may wish to set this to :code:`None`.
    :param key_name:
        A name for the key column in the output table. This is most
        useful when the provided key is a function. This argument is not
        valid when :code:`key` is a sequence.
    :returns:
        A new :class:`.Table`.
    """
    if key is None:
        key = []
    elif not utils.issequence(key):
        key = [key]
    elif key_name:
        raise ValueError(
            'key_name is not a valid argument when key is a sequence.')

    if aggregation is None:
        aggregation = Count()

    groups = self

    for k in key:
        groups = groups.group_by(k, key_name=key_name)

    aggregation_name = six.text_type(aggregation)
    computation_name = six.text_type(computation) if computation else None

    def apply_computation(table):
        computed = table.compute([(computation_name, computation)])

        excluded = computed.exclude([aggregation_name])

        return excluded

    if pivot is not None:
        groups = groups.group_by(pivot)

        column_type = aggregation.get_aggregate_data_type(self)

        table = groups.aggregate([(aggregation_name, aggregation)])

        pivot_count = len(set(table.columns[pivot].values()))

        if computation is not None:
            column_types = computation.get_computed_data_type(table)
            table = apply_computation(table)

        column_types = [column_type] * pivot_count

        table = table.denormalize(key,
                                  pivot,
                                  computation_name or aggregation_name,
                                  default_value=default_value,
                                  column_types=column_types)
    else:
        table = groups.aggregate([(aggregation_name, aggregation)])

        if computation:
            table = apply_computation(table)

    return table
Example #13
0
 def test_count(self):
     self.assertEqual(self.table.aggregate(Count()), 3)
Example #14
0
def pivot(self, key=None, pivot=None, aggregation=None, computation=None, default_value=utils.default, key_name=None):
    """
    Pivot reorganizes the data in a table by grouping the data, aggregating
    those groups, optionally applying a computation, and then organizing
    the groups into new rows and columns.

    For example:

    +---------+---------+-- ------+
    |  name   |  race   | gender |
    +=========+=========+========+
    |  Joe    |  white  | male   |
    +---------+---------+--------+
    |  Jane   |  black  | female |
    +---------+---------+--------+
    |  Josh   |  black  | male   |
    +---------+---------+--------+
    |  Jim    |  asian  | female |
    +---------+---------+--------+

    This table can be pivoted with :code:`key` equal to "race" and
    :code:`columns` equal to "gender". The default aggregation is
    :class:`.Count`. This would result in the following table.

    +---------+---------+--------+
    |  race   |  male   | female |
    +=========+=========+========+
    |  white  |  1      | 0      |
    +---------+---------+--------+
    |  black  |  1      | 1      |
    +---------+---------+--------+
    |  asian  |  0      | 1      |
    +---------+---------+--------+

    If one or more keys are specified then the resulting table will
    automatically have `row_names` set to those keys.

    See also the related method :meth:`Table.denormalize`.

    :param key:
        Either the name of a column from the this table to group by, a
        sequence of such column names, a :class:`function` that takes a
        row and returns a value to group by, or :code:`None`, in which case
        there will be only a single row in the output table.
    :param columns:
        A column name whose unique values will become columns in the new
        table, or :code:`None` in which case there will be a single value
        column in the output table.
    :param aggregation:
        An instance of an :class:`.Aggregation` to perform on each group of
        data in the pivot table. (Each cell is the result of an aggregation
        of the grouped data.)

        If not specified this defaults to :class:`.Count` with no arguments.
    :param computation:
        An optional :class:`.Computation` instance to be applied to the
        aggregated sequence of values before they are transposed into the
        pivot table.

        Use the class name of the aggregation as your column name argument
        when constructing your computation. (This is "Count" if using the
        default value for :code:`aggregation`.)
    :param default_value:
        Value to be used for missing values in the pivot table. Defaults to
        :code:`Decimal(0)`. If performing non-mathematical aggregations you
        may wish to set this to :code:`None`.
    :param key_name:
        A name for the key column in the output table. This is most
        useful when the provided key is a function. This argument is not
        valid when :code:`key` is a sequence.
    :returns:
        A new :class:`Table`.
    """
    if key is None:
        key = []
    elif not utils.issequence(key):
        key = [key]
    elif key_name:
        raise ValueError('key_name is not a valid argument when key is a sequence.')

    if aggregation is None:
        aggregation = Count()

    groups = self

    for k in key:
        groups = groups.group_by(k, key_name=key_name)

    aggregation_name = six.text_type(aggregation)
    computation_name = six.text_type(computation) if computation else None

    def apply_computation(table):
        computed = table.compute([
            (computation_name, computation)
        ])

        excluded = computed.exclude([aggregation_name])

        return excluded

    if pivot is not None:
        groups = groups.group_by(pivot)

        column_type = aggregation.get_aggregate_data_type(groups)

        table = groups.aggregate([
            (aggregation_name, aggregation)
        ])

        pivot_count = len(set(table.columns[pivot].values()))

        if computation is not None:
            column_types = computation.get_computed_data_type(table)
            table = apply_computation(table)

        column_types = [column_type] * pivot_count

        table = table.denormalize(key, pivot, computation_name or aggregation_name, default_value=default_value, column_types=column_types)
    else:
        table = groups.aggregate([
            (aggregation_name, aggregation)
        ])

        if computation:
            table = apply_computation(table)

    return table
Example #15
0
 def test_multiple(self):
     self.assertSequenceEqual(self.table.aggregate([Count(),
                                                    Sum('two')]), [3, 9])