コード例 #1
0
ファイル: table.py プロジェクト: esagara/journalism
    def rank(self, key, new_column_name):
        """
        Creates a new column that is the rank order of the values
        returned by the row function.

        :param key:  
        :param after_column_name: The name of the column containing the
            *after* values.
        :param new_column_name: The name of the resulting column.
        :returns: A new :class:`Table`.
        """
        key_is_row_function = hasattr(key, '__call__')

        def null_handler(k):
            if k is None:
                return NullOrder()

            return k

        if key_is_row_function:
            values = [key(row) for row in self.rows]
            compute_func = lambda row: rank_column.index(key(row)) + 1
        else:
            values = [row[key] for row in self.rows]
            compute_func = lambda row: rank_column.index(row[key]) + 1

        rank_column = sorted(values, key=null_handler)

        return self.compute(new_column_name, NumberType(), compute_func)
コード例 #2
0
ファイル: test_table.py プロジェクト: esagara/journalism
    def setUp(self):
        self.rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c'))

        self.column_names = ('one', 'two', 'three')
        self.number_type = NumberType()
        self.text_type = TextType()
        self.column_types = (self.number_type, self.number_type,
                             self.text_type)
コード例 #3
0
ファイル: test_table.py プロジェクト: esagara/journalism
    def setUp(self):
        self.rows = (('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None),
                     ('b', 3, 4, None))

        self.number_type = NumberType()
        self.text_type = TextType()
        self.column_types = (self.text_type, self.number_type,
                             self.number_type, self.number_type)
        self.column_names = ('one', 'two', 'three', 'four')
コード例 #4
0
ファイル: test_columns.py プロジェクト: esagara/journalism
    def setUp(self):
        self.rows = ((Decimal('1.1'), Decimal('2.19'),
                      'a'), (Decimal('2.7'), Decimal('3.42'), 'b'),
                     (None, Decimal('4.1'), 'c'), (Decimal('2.7'),
                                                   Decimal('3.42'), 'c'))
        self.column_names = ('one', 'two', 'three')
        self.number_type = NumberType()
        self.text_type = TextType()
        self.column_types = (self.number_type, self.number_type,
                             self.text_type)

        self.table = Table(self.rows, self.column_types, self.column_names)
コード例 #5
0
ファイル: test_table.py プロジェクト: esagara/journalism
    def setUp(self):
        self.left_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c'))

        self.right_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c'))

        self.number_type = NumberType()
        self.text_type = TextType()
        self.column_types = (self.number_type, self.number_type,
                             self.text_type)

        self.left = Table(self.left_rows, self.column_types,
                          ('one', 'two', 'three'))
        self.right = Table(self.right_rows, self.column_types,
                           ('four', 'five', 'six'))
コード例 #6
0
ファイル: table.py プロジェクト: esagara/journalism
    def z_scores(self, column_name, new_column_name):
        """ Returns a new column that is the z-score or standard score of 
        each value returned by the row function.
        
        :param column_name: The name of the column for z-scores to be based
        off of.
        :param new_column_name: The name of the resulting column.
        :returns: A new :class:`Table`.
        """
        mean = self.columns[column_name].mean()
        sd = self.columns[column_name].stdev()
        compute_func = lambda row: (row[column_name] - mean) / sd

        return self.compute(new_column_name, NumberType(), compute_func)
コード例 #7
0
ファイル: table.py プロジェクト: esagara/journalism
    def percent_change(self, before_column_name, after_column_name,
                       new_column_name):
        """
        A wrapper around :meth:`compute` for quickly computing
        percent change between two columns.

        :param before_column_name: The name of the column containing the
            *before* values. 
        :param after_column_name: The name of the column containing the
            *after* values.
        :param new_column_name: The name of the resulting column.
        :returns: A new :class:`Table`.
        """
        def calc(row):
            return (row[after_column_name] -
                    row[before_column_name]) / row[before_column_name] * 100

        return self.compute(new_column_name, NumberType(), calc)
コード例 #8
0
ファイル: test_columns.py プロジェクト: esagara/journalism
 def test_number_cast_float(self):
     with self.assertRaises(CastError):
         NumberType().cast(1.1)
コード例 #9
0
ファイル: test_columns.py プロジェクト: esagara/journalism
 def test_number_cast_text(self):
     with self.assertRaises(CastError):
         NumberType().cast('a')
コード例 #10
0
ファイル: test_columns.py プロジェクト: esagara/journalism
 def test_number_cast(self):
     values = (2, 1, None, Decimal('2.7'), 'n/a')
     casted = tuple(NumberType().cast(v) for v in values)
     self.assertSequenceEqual(
         casted, (Decimal('2'), Decimal('1'), None, Decimal('2.7'), None))
コード例 #11
0
ファイル: test_columns.py プロジェクト: esagara/journalism
 def test_number(self):
     self.assertIsInstance(NumberType()._create_column(None, 1),
                           NumberColumn)
コード例 #12
0
ファイル: table.py プロジェクト: esagara/journalism
    def aggregate(self, group_by, operations):
        """
        Aggregate data by grouping values together and performing some
        set of column operations on the groups.

        The columns of the output table (except for the :code:`group_by`
        column, will be named :code:`originalname_operation`. For instance
        :code:`salaries_median`.

        A :code:`group_by_count` column will always be added to the output.
        The order of the output columns will be :code:`('group_by', 
        'group_by_count', 'column_one_operation', ...)`.

        :param group_by: The name of a column to group by. 
        :param operations: A :class:`dict: where the keys are column names
            and the values are the names of :class:`.Column` methods, such
            as "sum" or "max_length".
        :returns: A new :class:`Table`.
        :raises: :exc:`.ColumnDoesNotExistError`, :exc:`.UnsupportedOperationError`
        """
        try:
            i = self._column_names.index(group_by)
        except ValueError:
            raise ColumnDoesNotExistError(group_by)

        groups = OrderedDict()

        for row in self._data:
            group_name = row[i]

            if group_name not in groups:
                groups[group_name] = []

            groups[group_name].append(row)

        output = []

        column_types = [self._column_types[i], NumberType()]
        column_names = [group_by, '%s_count' % group_by]

        for op_column, operation in operations:
            try:
                j = self._column_names.index(op_column)
            except ValueError:
                raise ColumnDoesNotExistError(op_column)

            column_type = self._column_types[j]

            column_types.append(column_type)
            column_names.append('%s_%s' % (op_column, operation))

        for name, group_rows in groups.items():
            group_table = Table(group_rows, self._column_types,
                                self._column_names)
            new_row = [name, len(group_table.rows)]

            for op_column, operation in operations:
                c = group_table.columns[op_column]

                try:
                    op = getattr(c, operation)
                except AttributeError:
                    raise UnsupportedOperationError(operation, c)

                new_row.append(op())

            output.append(tuple(new_row))

        return self._fork(output, column_types, column_names)