def test_sum(self): with self.assertRaises(DataTypeError): Sum('three').validate(self.table) Sum('one').validate(self.table) self.assertEqual(Sum('one').run(self.table), Decimal('6.5')) self.assertEqual(Sum('two').run(self.table), Decimal('13.13'))
def test_sum(self): self.assertIsInstance( Sum('test').get_aggregate_data_type(self.time_delta_table), TimeDelta) Sum('test').validate(self.time_delta_table) self.assertEqual( Sum('test').run(self.time_delta_table), datetime.timedelta(seconds=30))
def test_aggregeate_bad_column(self): tableset = TableSet(self.tables.values(), self.tables.keys()) with self.assertRaises(KeyError): tableset.aggregate([('one_sum', Sum('one'))]) with self.assertRaises(KeyError): tableset.aggregate([('bad_sum', Sum('bad'))])
def run(self, table): """ :returns: :class:`decimal.Decimal` """ # If the user has provided a total, use that if self._total is not None: total = self._total # Otherwise compute the sum of all the values in that column to # act as our denominator else: total = table.aggregate(Sum(self._column_name)) # Raise error if sum is less than or equal to zero if total <= 0: raise DataTypeError( 'The sum of column values must be a positive number') # Create a list new rows new_column = [] # Loop through the existing rows for row in table.rows: # Pull the value value = row[self._column_name] if value is None: new_column.append(None) continue # Try to divide it out of the total percent = value / total # And multiply it by 100 percent = percent * 100 # Append the value to the new list new_column.append(percent) # Pass out the list return new_column
def test_multiple(self): self.assertEqual( self.table.aggregate([ ('count', Count()), ('sum', Sum('two')) ]), { 'count': 3, 'sum': 9 } )
def test_pivot_sum(self): table = Table(self.rows, self.column_names, self.column_types) pivot_table = table.pivot('race', 'gender', Sum('age')) pivot_rows = (('white', 20, 45), ('black', 20, 0), ('latino', 25, 0), ('asian', 0, 25)) self.assertColumnNames(pivot_table, ['race', 'male', 'female']) self.assertColumnTypes(pivot_table, [Text, Number, Number]) self.assertRows(pivot_table, pivot_rows)
def test_aggregate_sum(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_table = tableset.aggregate([('count', Count()), ('number_sum', Sum('number'))]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ('group', 'count', 'number_sum')) self.assertColumnTypes(new_table, [Text, Number, Number]) self.assertRows(new_table, [('table1', 3, 6), ('table2', 3, 7), ('table3', 3, 6)])
def test_having_complex(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_tableset = tableset.having( [('count', Count()), ('number_sum', Sum('number'))], lambda t: t['count'] >= 3 and t['number_sum'] > 6) self.assertIsInstance(new_tableset, TableSet) self.assertSequenceEqual(new_tableset.keys(), ['table2']) self.assertIs(new_tableset.values()[0], tableset['table2']) self.assertEqual(new_tableset.key_name, 'test')
def pearson_correlation(self, column_one, column_two): """ Calculates the `Pearson correlation coefficient <http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient>`_ for :code:`column_one` and :code:`column_two`. Returns a number between -1 and 1 with 0 implying no correlation. A correlation close to 1 implies a high positive correlation i.e. as x increases so does y. A correlation close to -1 implies a high negative correlation i.e. as x increases, y decreases. Note: this implementation is borrowed from the MIT licensed `latimes-calculate <https://github.com/datadesk/latimes-calculate/blob/master/calculate/pearson.py>`_. Thanks, LAT! :param column_one: The name of a column. :param column_two: The name of a column. :returns: :class:`decimal.Decimal`. """ x = self.columns[column_one] y = self.columns[column_two] if x.has_nulls() or y.has_nulls(): raise NullComputationError n = len(x) sum_x = x.aggregate(Sum()) sum_y = y.aggregate(Sum()) square = lambda x: pow(x, 2) sum_x_sq = sum(map(square, x)) sum_y_sq = sum(map(square, y)) product_sum = sum((x_val * y_val for x_val, y_val in zip(x, y))) pearson_numerator = product_sum - (sum_x * sum_y / n) pearson_denominator = ((sum_x_sq - pow(sum_x, 2) / n) * (sum_y_sq - pow(sum_y, 2) / n)).sqrt() if pearson_denominator == 0: return 0 return pearson_numerator / pearson_denominator
def test_nested_aggregation(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') nested = tableset.group_by('letter') results = nested.aggregate([('count', Count()), ('number_sum', Sum('number'))]) self.assertIsInstance(results, Table) self.assertColumnNames(results, ('test', 'letter', 'count', 'number_sum')) self.assertColumnTypes(results, (Text, Text, Number, Number)) self.assertRows(results, [('table1', 'a', 2, 4), ('table1', 'b', 1, 2), ('table2', 'b', 1, 0), ('table2', 'a', 1, 2), ('table2', 'c', 1, 5), ('table3', 'a', 2, 3), ('table3', 'c', 1, 3)])
def test_nested_aggregate_row_names(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') nested = tableset.group_by('letter') results = nested.aggregate([('count', Count()), ('number_sum', Sum('number'))]) self.assertRowNames(results, [ ('table1', 'a'), ('table1', 'b'), ('table2', 'b'), ('table2', 'a'), ('table2', 'c'), ('table3', 'a'), ('table3', 'c'), ]) self.assertSequenceEqual(results.rows[('table1', 'a')], ('table1', 'a', 2, 4)) self.assertSequenceEqual(results.rows[('table2', 'c')], ('table2', 'c', 1, 5))
def test_sum(self): self.assertEqual(self.table.aggregate(Sum('two')), 9)
def test_sum_all_nulls(self): self.assertEqual(Sum('four').run(self.table), Decimal('0'))
def test_sum_all_nulls(self): self.assertEqual( Sum('null').run(self.time_delta_table), datetime.timedelta(0))
def test_multiple(self): self.assertSequenceEqual(self.table.aggregate([Count(), Sum('two')]), [3, 9])
def test_aggregate_sum_invalid(self): tableset = TableSet(self.tables.values(), self.tables.keys()) with self.assertRaises(DataTypeError): tableset.aggregate([('letter_sum', Sum('letter'))])