def rank(self, key, new_column_name): """ Creates a new column that is the rank order of the values returned by the row function. :param key: :param after_column_name: The name of the column containing the *after* values. :param new_column_name: The name of the resulting column. :returns: A new :class:`Table`. """ key_is_row_function = hasattr(key, '__call__') def null_handler(k): if k is None: return NullOrder() return k if key_is_row_function: values = [key(row) for row in self.rows] compute_func = lambda row: rank_column.index(key(row)) + 1 else: values = [row[key] for row in self.rows] compute_func = lambda row: rank_column.index(row[key]) + 1 rank_column = sorted(values, key=null_handler) return self.compute(new_column_name, NumberType(), compute_func)
def setUp(self): self.rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.column_names = ('one', 'two', 'three') self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.number_type, self.number_type, self.text_type)
def setUp(self): self.rows = (('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None), ('b', 3, 4, None)) self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.text_type, self.number_type, self.number_type, self.number_type) self.column_names = ('one', 'two', 'three', 'four')
def setUp(self): self.rows = ((Decimal('1.1'), Decimal('2.19'), 'a'), (Decimal('2.7'), Decimal('3.42'), 'b'), (None, Decimal('4.1'), 'c'), (Decimal('2.7'), Decimal('3.42'), 'c')) self.column_names = ('one', 'two', 'three') self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.number_type, self.number_type, self.text_type) self.table = Table(self.rows, self.column_types, self.column_names)
def setUp(self): self.left_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.right_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.number_type, self.number_type, self.text_type) self.left = Table(self.left_rows, self.column_types, ('one', 'two', 'three')) self.right = Table(self.right_rows, self.column_types, ('four', 'five', 'six'))
def z_scores(self, column_name, new_column_name): """ Returns a new column that is the z-score or standard score of each value returned by the row function. :param column_name: The name of the column for z-scores to be based off of. :param new_column_name: The name of the resulting column. :returns: A new :class:`Table`. """ mean = self.columns[column_name].mean() sd = self.columns[column_name].stdev() compute_func = lambda row: (row[column_name] - mean) / sd return self.compute(new_column_name, NumberType(), compute_func)
def percent_change(self, before_column_name, after_column_name, new_column_name): """ A wrapper around :meth:`compute` for quickly computing percent change between two columns. :param before_column_name: The name of the column containing the *before* values. :param after_column_name: The name of the column containing the *after* values. :param new_column_name: The name of the resulting column. :returns: A new :class:`Table`. """ def calc(row): return (row[after_column_name] - row[before_column_name]) / row[before_column_name] * 100 return self.compute(new_column_name, NumberType(), calc)
def test_number_cast_float(self): with self.assertRaises(CastError): NumberType().cast(1.1)
def test_number_cast_text(self): with self.assertRaises(CastError): NumberType().cast('a')
def test_number_cast(self): values = (2, 1, None, Decimal('2.7'), 'n/a') casted = tuple(NumberType().cast(v) for v in values) self.assertSequenceEqual( casted, (Decimal('2'), Decimal('1'), None, Decimal('2.7'), None))
def test_number(self): self.assertIsInstance(NumberType()._create_column(None, 1), NumberColumn)
def aggregate(self, group_by, operations): """ Aggregate data by grouping values together and performing some set of column operations on the groups. The columns of the output table (except for the :code:`group_by` column, will be named :code:`originalname_operation`. For instance :code:`salaries_median`. A :code:`group_by_count` column will always be added to the output. The order of the output columns will be :code:`('group_by', 'group_by_count', 'column_one_operation', ...)`. :param group_by: The name of a column to group by. :param operations: A :class:`dict: where the keys are column names and the values are the names of :class:`.Column` methods, such as "sum" or "max_length". :returns: A new :class:`Table`. :raises: :exc:`.ColumnDoesNotExistError`, :exc:`.UnsupportedOperationError` """ try: i = self._column_names.index(group_by) except ValueError: raise ColumnDoesNotExistError(group_by) groups = OrderedDict() for row in self._data: group_name = row[i] if group_name not in groups: groups[group_name] = [] groups[group_name].append(row) output = [] column_types = [self._column_types[i], NumberType()] column_names = [group_by, '%s_count' % group_by] for op_column, operation in operations: try: j = self._column_names.index(op_column) except ValueError: raise ColumnDoesNotExistError(op_column) column_type = self._column_types[j] column_types.append(column_type) column_names.append('%s_%s' % (op_column, operation)) for name, group_rows in groups.items(): group_table = Table(group_rows, self._column_types, self._column_names) new_row = [name, len(group_table.rows)] for op_column, operation in operations: c = group_table.columns[op_column] try: op = getattr(c, operation) except AttributeError: raise UnsupportedOperationError(operation, c) new_row.append(op()) output.append(tuple(new_row)) return self._fork(output, column_types, column_names)