def test_create_table_args(self): with self.assertRaises(ValueError): Table(self.rows, [ self.number_type, self.number_type, self.text_type, self.text_type ], self.column_names) with self.assertRaises(ValueError): Table(self.rows, self.column_types, ['one', 'two', 'three', 'four']) with self.assertRaises(ValueError): Table(self.rows, [self.number_type, self.number_type], ['one', 'two'])
def setUp(self): self.left_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.right_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.number_type, self.number_type, self.text_type) self.left = Table(self.left_rows, self.column_types, ('one', 'two', 'three')) self.right = Table(self.right_rows, self.column_types, ('four', 'five', 'six'))
def test_order_by_preserves_rows(self): table = Table(self.rows, self.column_types, self.column_names) table2 = table.order_by(lambda r: r['one']) table3 = table2.order_by(lambda r: r['one']) self.assertIsNot(table._data[0], table2._data[0]) self.assertIs(table2._data[0], table3._data[0])
def analyse_insights(): """ generate reports from insights data """ column_types = (date_type, number_type, number_type, number_type, number_type, number_type, boolean_type, text_type, text_type, text_type, text_type, boolean_type, text_type, text_type) with open('www/live-data/insights.csv') as f: rows = list(csv.reader(f)) column_names = rows.pop(0) table = Table(rows, column_types, column_names) summary_definition = list( itertools.product(FACEBOOK_METRICS, SUMMARY_TYPES)) summary = table.aggregate('provider_type', summary_definition) count_grand_total = summary.columns['provider_type_count'].sum() summary = summary.compute( 'provider_type_count_pct', number_type, lambda x: (x['provider_type_count'] / count_grand_total) * 100) summary = summary.order_by('provider_type') _write_summary_csv(summary, 'www/live-data/insights_summary.csv') for metric in FACEBOOK_METRICS: _generate_insights_histograms(metric, table, summary)
def test_where_preserves_rows(self): table = Table(self.rows, self.column_types, self.column_names) table2 = table.where(lambda r: r['one'] == 1) table3 = table2.where(lambda r: r['one'] == 1) self.assertIsNot(table._data[0], table2._data[0]) self.assertIs(table2._data[0], table3._data[0])
def test_limit_preserves_rows(self): table = Table(self.rows, self.column_types, self.column_names) table2 = table.limit(2) table3 = table2.limit(2) self.assertIsNot(table._data[0], table2._data[0]) self.assertIs(table2._data[0], table3._data[0])
def test_pearson_correlation(self): rows = ((-1, 0, 'a'), (0, 0, 'b'), (1, 3, 'c')) table = Table(rows, self.column_types, self.column_names) self.assertEqual(table.pearson_correlation('one', 'one'), Decimal('1')) self.assertAlmostEqual(table.pearson_correlation('one', 'two'), Decimal('3').sqrt() * Decimal('0.5'))
def test_create_table(self): table = Table(self.rows, self.column_types, self.column_names) self.assertEqual(len(table.rows), 3) self.assertSequenceEqual(table.rows[0], (1, 4, 'a')) self.assertSequenceEqual(table.rows[1], (2, 3, 'b')) self.assertSequenceEqual(table.rows[2], (None, 2, 'c'))
def test_column_names_immutable(self): column_names = ['one', 'two', 'three'] table = Table(self.rows, self.column_types, column_names) column_names[0] = 'five' self.assertEqual(table.get_column_names()[0], 'one')
def test_aggregeate_bad_column(self): table = Table(self.rows, self.column_types, self.column_names) with self.assertRaises(ColumnDoesNotExistError): table.aggregate('bad', (('one', 'sum'), )) with self.assertRaises(ColumnDoesNotExistError): table.aggregate('two', (('bad', 'sum'), ))
def test_percentile(self): rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (1, 2, 'a')) table = Table(rows, self.column_types, self.column_names) self.assertEqual(table.columns['two'].percentile(25), [Decimal(2)]) self.assertEqual(table.columns['two'].percentile(50), [Decimal(2)]) self.assertEqual(table.columns['two'].percentile(75), [Decimal(3)])
def test_stringify_long(self): rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c')) self.table = Table(rows, self.column_types, self.column_names) self.assertEqual( str(self.table.columns['one']), "<journalism.columns.NumberColumn: (1, 2, None, 1, 2, ...)>")
def setUp(self): self.rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c')) self.column_names = ('one', 'two', 'three') self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.number_type, self.number_type, self.text_type) self.table = Table(self.rows, self.column_types, self.column_names)
def test_count(self): rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (1, 2, 'a')) table = Table(rows, self.column_types, self.column_names) self.assertEqual(table.columns['one'].count(1), 3) self.assertEqual(table.columns['one'].count(4), 0) self.assertEqual(table.columns['one'].count(None), 1)
def test_order_by_reverse(self): table = Table(self.rows, self.column_types, self.column_names) new_table = table.order_by(lambda r: r['two'], reverse=True) self.assertEqual(len(new_table.rows), 3) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b')) self.assertSequenceEqual(new_table.rows[2], (None, 2, 'c'))
def test_where(self): table = Table(self.rows, self.column_types, self.column_names) new_table = table.where(lambda r: r['one'] in (2, None)) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 2) self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b')) self.assertSequenceEqual(new_table.columns['one'], (2, None))
def test_limit(self): table = Table(self.rows, self.column_types, self.column_names) new_table = table.limit(2) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 2) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a')) self.assertSequenceEqual(new_table.columns['one'], (1, 2))
def test_compute_creates_rows(self): table = Table(self.rows, self.column_types, self.column_names) table2 = table.compute('new2', self.number_type, lambda r: r['one']) table3 = table2.compute('new3', self.number_type, lambda r: r['one']) self.assertIsNot(table._data[0], table2._data[0]) self.assertNotEqual(table._data[0], table2._data[0]) self.assertIsNot(table2._data[0], table3._data[0]) self.assertNotEqual(table2._data[0], table3._data[0]) self.assertSequenceEqual(table._data[0], (1, 4, 'a'))
def test_limit_step_only(self): table = Table(self.rows, self.column_types, self.column_names) new_table = table.limit(step=2) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 2) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (None, 2, 'c')) self.assertSequenceEqual(new_table.columns['one'], (1, None))
def test_limit_slice_negative(self): table = Table(self.rows, self.column_types, self.column_names) new_table = table.limit(-2, step=-1) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 2) self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b')) self.assertSequenceEqual(new_table.rows[1], (1, 4, 'a')) self.assertSequenceEqual(new_table.columns['one'], (2, 1))
def setUp(self): self.rows = (('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None), ('b', 3, 4, None)) self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.text_type, self.number_type, self.number_type, self.number_type) self.column_names = ('one', 'two', 'three', 'four') self.table = Table(self.rows, self.column_types, self.column_names)
def test_fork_preserves_data(self): table = Table(self.rows, self.column_types, self.column_names) table2 = table._fork(table.rows) self.assertIs(table.rows[0], table2._data[0]) self.assertIs(table.rows[1], table2._data[1]) self.assertIs(table.rows[2], table2._data[2]) self.assertIs(table.rows[0], table2.rows[0]) self.assertIs(table.rows[1], table2.rows[1]) self.assertIs(table.rows[2], table2.rows[2])
def test_stringify_long(self): rows = ((1, 2, 'a', 'b', 'c', 'd'), ) column_types = (self.number_type, self.number_type, self.text_type, self.text_type, self.text_type, self.text_type) column_names = ('one', 'two', 'three', 'four', 'five', 'six') self.table = Table(rows, column_types, column_names) self.assertEqual(str(self.table.rows[0]), "<journalism.rows.Row: (1, 2, a, b, c, ...)>")
def setUp(self): self.rows = ((Decimal('1.1'), Decimal('2.19'), 'a'), (Decimal('2.7'), Decimal('3.42'), 'b'), (None, Decimal('4.1'), 'c'), (Decimal('2.7'), Decimal('3.42'), 'c')) self.column_names = ('one', 'two', 'three') self.number_type = NumberType() self.text_type = TextType() self.column_types = (self.number_type, self.number_type, self.text_type) self.table = Table(self.rows, self.column_types, self.column_names)
def test_order_by_func(self): rows = ((1, 2, 'a'), (2, 1, 'b'), (1, 1, 'c')) table = Table(rows, self.column_types, self.column_names) new_table = table.order_by(lambda r: (r['one'], r['two'])) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertSequenceEqual(new_table.rows[0], (1, 1, 'c')) self.assertSequenceEqual(new_table.rows[1], (1, 2, 'a')) self.assertSequenceEqual(new_table.rows[2], (2, 1, 'b'))
def test_distinct_column(self): rows = ((1, 2, 'a'), (2, None, None), (1, 1, 'c'), (1, None, None)) table = Table(rows, self.column_types, self.column_names) new_table = table.distinct('one') self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 2) self.assertSequenceEqual(new_table.rows[0], (1, 2, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, None, None)) self.assertSequenceEqual(new_table.columns['one'], (1, 2))
def test_chain_select_where(self): table = Table(self.rows, self.column_types, self.column_names) new_table = table.select(('one', 'two')).where(lambda r: r['two'] == 3) self.assertEqual(len(new_table.rows), 1) self.assertSequenceEqual(new_table.rows[0], (2, 3)) self.assertEqual(len(new_table.columns), 2) self.assertSequenceEqual(new_table._column_types, (self.number_type, self.number_type)) self.assertEqual(new_table._column_names, ('one', 'two')) self.assertSequenceEqual(new_table.columns['one'], (2, ))
def test_order_by_nulls(self): rows = ((1, 2, None), (2, None, None), (1, 1, 'c'), (1, None, 'a')) table = Table(rows, self.column_types, self.column_names) new_table = table.order_by('two') self.assertSequenceEqual(new_table.columns['two'], (1, 2, None, None)) new_table = table.order_by('three') self.assertSequenceEqual(new_table.columns['three'], ('a', 'c', None, None))
def test_distinct_func(self): rows = ((1, 2, 'a'), (2, None, None), (1, 1, 'c'), (1, None, None)) table = Table(rows, self.column_types, self.column_names) new_table = table.distinct(lambda row: (row['two'], row['three'])) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertSequenceEqual(new_table.rows[0], (1, 2, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, None, None)) self.assertSequenceEqual(new_table.rows[2], (1, 1, 'c')) self.assertSequenceEqual(new_table.columns['one'], (1, 2, 1))
def test_aggregate_two_ops(self): table = Table(self.rows, self.column_types, self.column_names) new_table = table.aggregate('one', (('two', 'sum'), ('two', 'mean'))) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 4) self.assertSequenceEqual(new_table._column_names, ('one', 'one_count', 'two_sum', 'two_mean')) self.assertSequenceEqual(new_table.rows[0], ('a', 2, 4, 2)) self.assertSequenceEqual(new_table.rows[1], (None, 1, 3, 3)) self.assertSequenceEqual(new_table.rows[2], ('b', 1, 3, 3))