def test_full_outer(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b'), (3, 2, 'c') ) right_rows = ( (1, 4, 'a'), (2, 3, 'b'), (4, 2, 'c') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'four', full_outer=True) self.assertIsNot(new_table, left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'five', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 4, 'a'), (2, 3, 'b', 2, 3, 'b'), (3, 2, 'c', None, None, None), (None, None, None, 4, 2, 'c') ])
def test_limit_preserves_rows(self): table = Table(self.rows, self.column_names, self.column_types) table2 = table.limit(2) table3 = table2.limit(2) self.assertIs(table.rows[0], table2.rows[0]) self.assertIs(table2.rows[0], table3.rows[0])
def test_to_json_error_newline_indent(self): table = Table(self.rows, self.column_names, self.column_types) output = six.StringIO() with self.assertRaises(ValueError): table.to_json(output, newline=True, indent=4)
def test_fork_preserves_data(self): table = Table(self.rows, self.column_names, self.column_types) table2 = table._fork(table.rows) self.assertIs(table.rows[0], table2.rows[0]) self.assertIs(table.rows[1], table2.rows[1]) self.assertIs(table.rows[2], table2.rows[2])
def test_change_datetimes(self): rows = ( ('10/4/2015 4:43', '10/7/2015 4:50'), ('10/2/2015 12 PM', '9/28/2015 12 PM'), ('9/28/2015 12:00:00', '9/1/2015 6 PM') ) datetime_type = DateTime() column_names = ['one', 'two'] column_types = [datetime_type, datetime_type] table = Table(rows, column_names, column_types) new_table = table.compute([ ('test', Change('one', 'two')) ]) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table.rows[0], ( datetime.datetime(2015, 10, 4, 4, 43), datetime.datetime(2015, 10, 7, 4, 50), datetime.timedelta(days=3, minutes=7) )) self.assertEqual(new_table.columns['test'][0], datetime.timedelta(days=3, minutes=7)) self.assertEqual(new_table.columns['test'][1], datetime.timedelta(days=-4)) self.assertEqual(new_table.columns['test'][2], datetime.timedelta(days=-26, hours=-18))
def test_group_by_function_group_name(self): table = Table(self.rows, self.columns) tableset = table.group_by(lambda r: r['three'] < 5, key_name='test') self.assertIsInstance(tableset, TableSet) self.assertEqual(tableset._key_name, 'test')
def test_group_by_group_name(self): table = Table(self.rows, self.columns) tableset = table.group_by('one', key_name='test') self.assertIsInstance(tableset, TableSet) self.assertEqual(tableset._key_name, 'test')
def test_change_dates(self): rows = ( ('10/4/2015', '10/7/2015'), ('10/2/2015', '9/28/2015'), ('9/28/2015', '9/1/2015') ) date_type = Date() column_names = ['one', 'two'] column_types = [date_type, date_type] table = Table(rows, column_names, column_types) new_table = table.compute([ ('test', Change('one', 'two')) ]) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table.rows[0], ( datetime.date(2015, 10, 4), datetime.date(2015, 10, 7), datetime.timedelta(days=3) )) self.assertEqual(new_table.columns['test'][0], datetime.timedelta(days=3)) self.assertEqual(new_table.columns['test'][1], datetime.timedelta(days=-4)) self.assertEqual(new_table.columns['test'][2], datetime.timedelta(days=-27))
class TestAggregate(AgateTestCase): def setUp(self): self.rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, u'👍') ) self.number_type = Number() self.text_type = Text() self.column_names = ['one', 'two', 'three'] self.column_types = [self.number_type, self.number_type, self.text_type] self.table = Table(self.rows, self.column_names, self.column_types) def test_count(self): self.assertEqual(self.table.aggregate(Count()), 3) def test_sum(self): self.assertEqual(self.table.aggregate(Sum('two')), 9) def test_multiple(self): self.assertEqual( self.table.aggregate([ ('count', Count()), ('sum', Sum('two')) ]), { 'count': 3, 'sum': 9 } )
def test_order_by_nulls(self): rows = ( (1, 2, None), (2, None, None), (1, 1, 'c'), (1, None, 'a') ) table = Table(rows, self.column_names, self.column_types) new_table = table.order_by('two') self.assertIsNot(new_table, table) self.assertColumnNames(new_table, self.column_names) self.assertColumnTypes(new_table, [Number, Number, Text]) self.assertRows(new_table, [ rows[2], rows[0], rows[1], rows[3] ]) new_table = table.order_by('three') self.assertIsNot(new_table, table) self.assertColumnNames(new_table, self.column_names) self.assertColumnTypes(new_table, [Number, Number, Text]) self.assertRows(new_table, [ rows[3], rows[2], rows[0], rows[1] ])
def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'five') self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 5) self.assertEqual(new_table.columns[0].name, 'one') self.assertEqual(new_table.columns[1].name, 'two') self.assertEqual(new_table.columns[2].name, 'three') self.assertEqual(new_table.columns[3].name, 'four') self.assertEqual(new_table.columns[4].name, 'six') self.assertIsInstance(new_table.columns[0].data_type, Number) self.assertIsInstance(new_table.columns[1].data_type, Number) self.assertIsInstance(new_table.columns[2].data_type, Text) self.assertIsInstance(new_table.columns[3].data_type, Number) self.assertIsInstance(new_table.columns[4].data_type, Text) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a', 1, 'a')) self.assertSequenceEqual(new_table.rows[1], (2, 3, 'b', 1, 'a')) self.assertSequenceEqual(new_table.rows[2], (2, 3, 'b', 2, 'b'))
def test_change_timedeltas(self): rows = (("4:15", "8:18"), ("4h 2m", "2h"), ("4 weeks", "27 days")) timedelta_type = TimeDelta() column_names = ["one", "two"] column_types = [timedelta_type, timedelta_type] table = Table(rows, column_names, column_types) new_table = table.compute([("test", Change("one", "two"))]) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual( new_table.rows[0], ( datetime.timedelta(minutes=4, seconds=15), datetime.timedelta(minutes=8, seconds=18), datetime.timedelta(minutes=4, seconds=3), ), ) self.assertEqual(new_table.columns["test"][0], datetime.timedelta(minutes=4, seconds=3)) self.assertEqual(new_table.columns["test"][1], datetime.timedelta(hours=-2, minutes=-2)) self.assertEqual(new_table.columns["test"][2], datetime.timedelta(days=-1))
class TestAggregate(AgateTestCase): def setUp(self): self.rows = ( (1, 4, 'a'), (2, 3, 'b'), (None, 2, u'👍') ) self.number_type = Number() self.text_type = Text() self.column_names = ['one', 'two', 'three'] self.column_types = [self.number_type, self.number_type, self.text_type] self.table = Table(self.rows, self.column_names, self.column_types) def test_length(self): self.assertEqual(self.table.aggregate(Length()), 3) def test_sum(self): self.assertEqual(self.table.aggregate(Sum('two')), 9) def test_multiple(self): self.assertSequenceEqual( self.table.aggregate([ Length(), Sum('two') ]), [3, 9] )
def test_to_json_error_newline_key(self): table = Table(self.rows, self.column_names, self.column_types) output = StringIO() with self.assertRaises(ValueError): table.to_json(output, key='three', newline=True)
def test_change_datetimes(self): rows = ( ("10/4/2015 4:43", "10/7/2015 4:50"), ("10/2/2015 12 PM", "9/28/2015 12 PM"), ("9/28/2015 12:00:00", "9/1/2015 6 PM"), ) datetime_type = DateTime() column_names = ["one", "two"] column_types = [datetime_type, datetime_type] table = Table(rows, column_names, column_types) new_table = table.compute([("test", Change("one", "two"))]) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual( new_table.rows[0], ( datetime.datetime(2015, 10, 4, 4, 43), datetime.datetime(2015, 10, 7, 4, 50), datetime.timedelta(days=3, minutes=7), ), ) self.assertEqual(new_table.columns["test"][0], datetime.timedelta(days=3, minutes=7)) self.assertEqual(new_table.columns["test"][1], datetime.timedelta(days=-4)) self.assertEqual(new_table.columns["test"][2], datetime.timedelta(days=-26, hours=-18))
def test_join_match_multiple(self): left_rows = ( (1, 4, 'a'), (2, 3, 'b') ) right_rows = ( (1, 1, 'a'), (1, 2, 'a'), (2, 2, 'b') ) left = Table(left_rows, self.left_column_names, self.column_types) right = Table(right_rows, self.right_column_names, self.column_types) new_table = left.join(right, 'one', 'five') self.assertIsNot(new_table, left) self.assertIsNot(new_table, right) self.assertColumnNames(new_table, ['one', 'two', 'three', 'four', 'six']) self.assertColumnTypes(new_table, [Number, Number, Text, Number, Text]) self.assertRows(new_table, [ (1, 4, 'a', 1, 'a'), (2, 3, 'b', 1, 'a'), (2, 3, 'b', 2, 'b') ])
def test_where_preserves_rows(self): table = Table(self.rows, self.column_names, self.column_types) table2 = table.where(lambda r: r['one'] == 1) table3 = table2.where(lambda r: r['one'] == 1) self.assertIs(table.rows[0], table2.rows[0]) self.assertIs(table2.rows[0], table3.rows[0])
def test_change_dates(self): rows = ( ('October 4th', 'October 7th'), ('October 2nd', 'September 28'), ('September 28th', '9/1/15') ) date_type = DateType() columns = ( ('one', date_type), ('two', date_type) ) table = Table(rows, columns) new_table = table.compute([ ('test', Change('one', 'two')) ]) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table.rows[0], ( datetime.date(2015, 10, 4), datetime.date(2015, 10, 7), datetime.timedelta(days=3) )) self.assertEqual(new_table.columns['test'][0], datetime.timedelta(days=3)) self.assertEqual(new_table.columns['test'][1], datetime.timedelta(days=-4)) self.assertEqual(new_table.columns['test'][2], datetime.timedelta(days=-27))
def test_change_datetimes(self): rows = ( ('October 4th 4:43', 'October 7th, 4:50'), ('October 2nd, 12 PM', 'September 28, 12 PM'), ('September 28th, 12:00:00', '9/1/15, 6 PM') ) datetime_type = DateTimeType() columns = ( ('one', datetime_type), ('two', datetime_type) ) table = Table(rows, columns) new_table = table.compute([ ('test', Change('one', 'two')) ]) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table.rows[0], ( datetime.datetime(2015, 10, 4, 4, 43), datetime.datetime(2015, 10, 7, 4, 50), datetime.timedelta(days=3, minutes=7) )) self.assertEqual(new_table.columns['test'][0], datetime.timedelta(days=3, minutes=7)) self.assertEqual(new_table.columns['test'][1], datetime.timedelta(days=-4)) self.assertEqual(new_table.columns['test'][2], datetime.timedelta(days=-26, hours=-18))
def test_print_html(self): table = Table(self.rows, self.column_names, self.column_types) table_html = six.StringIO() table.print_html(output=table_html) table_html = table_html.getvalue() parser = TableHTMLParser() parser.feed(table_html) self.assertIs(parser.has_table, True) self.assertIs(parser.has_tbody, True) self.assertIs(parser.has_thead, True) self.assertEqual(len(parser.header_rows), 1) self.assertEqual(len(parser.body_rows), len(table.rows)) header_cols = parser.header_rows[0] self.assertEqual(len(header_cols), len(table.column_names)) for i, column_name in enumerate(table.column_names): self.assertEqual(header_cols[i], column_name) for row_num, row in enumerate(table.rows): html_row = parser.body_rows[row_num] self.assertEqual(len(html_row), len(row))
def test_limit_preserves_rows(self): table = Table(self.rows, self.columns) table2 = table.limit(2) table3 = table2.limit(2) self.assertIsNot(table._data[0], table2._data[0]) self.assertIs(table2._data[0], table3._data[0])
def test_print_table_max_precision(self): rows = ( ('1.745', 1.745, 1.72), ('11.123456', 11.123456, 5.10), ('0', 0, 0.10) ) column_names = ['text_number', 'real_long_number', 'real_short_number'] column_types = [ self.text_type, self.number_type, self.number_type ] table = Table(rows, column_names, column_types) output = six.StringIO() table.print_table(output=output, max_precision=2) lines = output.getvalue().split('\n') # Text shouldn't be affected self.assertIn(u' 1.745 ', lines[2]) self.assertIn(u' 11.123456 ', lines[3]) self.assertIn(u' 0 ', lines[4]) # Test real precision above max self.assertIn(u' 1.74… ', lines[2]) self.assertIn(u' 11.12… ', lines[3]) self.assertIn(u' 0.00… ', lines[4]) # Test real precision below max self.assertIn(u' 1.72 ', lines[2]) self.assertIn(u' 5.10 ', lines[3]) self.assertIn(u' 0.10 ', lines[4])
def test_where_preserves_rows(self): table = Table(self.rows, self.columns) table2 = table.where(lambda r: r['one'] == 1) table3 = table2.where(lambda r: r['one'] == 1) self.assertIsNot(table._data[0], table2._data[0]) self.assertIs(table2._data[0], table3._data[0])
def test_order_by_preserves_rows(self): table = Table(self.rows, self.columns) table2 = table.order_by(lambda r: r['one']) table3 = table2.order_by(lambda r: r['one']) self.assertIsNot(table._data[0], table2._data[0]) self.assertIs(table2._data[0], table3._data[0])
def test_rename_column_names_renames_row_values(self): table = Table(self.rows, self.column_names, self.column_types) new_column_names = ['d', 'e', 'f'] table2 = table.rename(column_names=new_column_names) self.assertColumnNames(table2, new_column_names)
def test_order_by_preserves_rows(self): table = Table(self.rows, self.column_names, self.column_types) table2 = table.order_by(lambda r: r['one']) table3 = table2.order_by(lambda r: r['one']) self.assertIs(table.rows[0], table2.rows[0]) self.assertIs(table2.rows[0], table3.rows[0])
def test_change_timedeltas(self): rows = ( ('4:15', '8:18'), ('4h 2m', '2h'), ('4 weeks', '27 days') ) timedelta_type = TimeDelta() column_names = ['one', 'two'] column_types = [timedelta_type, timedelta_type] table = Table(rows, column_names, column_types) new_table = table.compute([ ('test', Change('one', 'two')) ]) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table.rows[0], ( datetime.timedelta(minutes=4, seconds=15), datetime.timedelta(minutes=8, seconds=18), datetime.timedelta(minutes=4, seconds=3) )) self.assertEqual(new_table.columns['test'][0], datetime.timedelta(minutes=4, seconds=3)) self.assertEqual(new_table.columns['test'][1], datetime.timedelta(hours=-2, minutes=-2)) self.assertEqual(new_table.columns['test'][2], datetime.timedelta(days=-1))
def test_from_fixed(self): table1 = Table.from_csv("examples/testfixed_converted.csv") table2 = Table.from_fixed("examples/testfixed", "examples/testfixed_schema.csv") self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [type(c) for c in table1.column_types]) self.assertRows(table2, table1.rows)
def test_print_bars_width_overlap(self): table = Table(self.rows, self.column_names, self.column_types) output = six.StringIO() table.print_bars('three', 'one', width=20, output=output) lines = output.getvalue().split('\n') self.assertEqual(max([len(l) for l in lines]), 20)
def test_format_max_columns(self): table = Table(self.rows, self.columns) output = table.format(max_columns=2) lines = output.split('\n') self.assertEqual(len(lines), 7) self.assertEqual(len(lines[0]), 22)
def test_items(self): table = Table(self.rows, self.column_names, self.column_types, row_names='three') self.assertSequenceEqual(table.columns['one'].items(), [('a', Decimal('1')), ('b', Decimal('2')), ('c', None)])
def test_to_csv_file_like_object(self): table = Table(self.rows, self.column_names, self.column_types) with open('.test.csv', 'w') as f: table.to_csv(f) # Should leave the file open self.assertFalse(f.closed) with open('.test.csv') as f: contents1 = f.read() with open('examples/test.csv') as f: contents2 = f.read() self.assertEqual(contents1, contents2) os.remove('.test.csv')
def test_from_json_nested(self): table = Table.from_json('examples/test_nested.json') self.assertColumnNames( table, ['one', 'two/two_a', 'two/two_b', 'three/0', 'three/1', 'three/2']) self.assertColumnTypes(table, [Number, Text, Text, Text, Number, Text]) self.assertRows( table, [[1, 'a', 'b', 'a', 2, 'c'], [2, 'c', 'd', 'd', 2, 'f']])
def setUp(self): self.left_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.right_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.number_type = NumberType() self.text_type = TextType() self.left_columns = (('one', self.number_type), ('two', self.number_type), ('three', self.text_type)) self.right_columns = (('four', self.number_type), ('five', self.number_type), ('six', self.text_type)) self.left = Table(self.left_rows, self.left_columns) self.right = Table(self.right_rows, self.right_columns)
def test_aggregate_key_type(self): tables = OrderedDict([(1, Table(self.table1, self.columns)), (2, Table(self.table2, self.columns)), (3, Table(self.table3, self.columns))]) tableset = TableSet(tables.values(), tables.keys(), key_name='test', key_type=self.number_type) new_table = tableset.aggregate([('number', Length(), 'count')]) self.assertIsInstance(new_table, Table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 2) self.assertSequenceEqual(new_table._column_names, ('test', 'count')) self.assertIsInstance(new_table._column_types[0], Number) self.assertIsInstance(new_table._column_types[1], Number)
def test_merge_mixed_names(self): table_a = Table(self.rows, self.column_names, self.column_types) column_names = ['two', 'one', 'four'] table_b = Table(self.rows, column_names, self.column_types) table_c = Table.merge([table_a, table_b]) self.assertIsNot(table_c, table_a) self.assertIsNot(table_c, table_b) self.assertColumnNames(table_c, ['one', 'two', 'three', 'four']) self.assertColumnTypes(table_c, [Number, Number, Text, Text]) self.assertSequenceEqual(table_c.rows[0], [1, 4, 'a', None]) self.assertSequenceEqual(table_c.rows[3], [4, 1, None, 'a']) for row in table_c.rows: self.assertSequenceEqual(row.keys(), ['one', 'two', 'three', 'four'])
def test_row_names(self): table = Table(self.rows, self.column_names, self.column_types, row_names='three') column = table.columns['one'] self.assertSequenceEqual(column._row_names, ['a', 'b', 'c']) self.assertEqual(column['b'], 2)
def setUp(self): self.left_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.right_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c')) self.number_type = Number() self.text_type = Text() self.left_column_names = ['one', 'two', 'three'] self.right_column_names = ['four', 'five', 'six'] self.column_types = [ self.number_type, self.number_type, self.text_type ] self.left = Table(self.left_rows, self.left_column_names, self.column_types) self.right = Table(self.right_rows, self.right_column_names, self.column_types)
def test_stringify_long(self): rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c')) self.table = Table(rows, self.columns) self.assertEqual( str(self.table.columns['one']), "<agate.columns.NumberColumn: (1, 2, None, 1, 2, ...)>")
def test_merge_with_column_names(self): table_a = Table(self.rows, self.column_names, self.column_types, row_names='three') b_rows = ((1, 4, 'd'), (2, 3, 'e'), (None, 2, 'f')) c_rows = ((1, 4, 'a'), (2, 3, 'b'), (None, 2, 'c'), (None, 4, 'd'), (None, 3, 'e'), (None, 2, 'f')) table_b = Table(b_rows, ['a', 'two', 'three'], self.column_types, row_names='three') table_c = Table.merge([table_a, table_b], column_names=table_a.column_names) self.assertRows(table_c, c_rows)
def test_count(self): rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (1, 2, 'a')) table = Table(rows, self.columns) self.assertEqual(table.columns['one'].aggregate(Count(1)), 3) self.assertEqual(table.columns['one'].aggregate(Count(4)), 0) self.assertEqual(table.columns['one'].aggregate(Count(None)), 1)
def test_from_csv_file_like_object(self): import csvkit from agate import table table.csv = csvkit table1 = Table.from_csv('examples/test.csv', self.columns) with open('examples/test.csv') as fh: table2 = Table.from_csv(fh, self.columns) self.assertSequenceEqual(table1.column_names, table2.column_names) self.assertSequenceEqual(table1.column_types, table2.column_types) self.assertEqual(len(table1.columns), len(table2.columns)) self.assertEqual(len(table1.rows), len(table2.rows)) self.assertSequenceEqual(table1.rows[0], table2.rows[0]) self.assertSequenceEqual(table1.rows[1], table2.rows[1]) self.assertSequenceEqual(table1.rows[2], table2.rows[2])
def setUp(self): self.table1 = (('a', 1), ('a', 3), ('b', 2)) self.table2 = (('b', 0), ('a', 2), ('c', 5)) self.table3 = (('a', 1), ('a', 2), ('c', 3)) self.text_type = Text() self.number_type = Number() self.columns = (('letter', self.text_type), ('number', self.number_type)) self.tables = OrderedDict([('table1', Table(self.table1, self.columns)), ('table2', Table(self.table2, self.columns)), ('table3', Table(self.table3, self.columns))])
def test_limit_step_only(self): table = Table(self.rows, self.columns) new_table = table.limit(step=2) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 2) self.assertSequenceEqual(new_table.rows[0], (1, 4, 'a')) self.assertSequenceEqual(new_table.rows[1], (None, 2, u'👍')) self.assertSequenceEqual(new_table.columns['one'], (1, None))
def test_limit_slice_negative(self): table = Table(self.rows, self.columns) new_table = table.limit(-2, step=-1) self.assertIsNot(new_table, table) self.assertEqual(len(new_table.rows), 2) self.assertSequenceEqual(new_table.rows[0], (2, 3, 'b')) self.assertSequenceEqual(new_table.rows[1], (1, 4, 'a')) self.assertSequenceEqual(new_table.columns['one'], (2, 1))
def test_percentile_rank(self): rows = [(n,) for n in range(1, 1001)] table = Table(rows, ['ints'], [self.number_type]) new_table = table.compute([ ('percentiles', PercentileRank('ints')) ]) self.assertEqual(len(new_table.rows), 1000) self.assertEqual(len(new_table.columns), 2) self.assertSequenceEqual(new_table.rows[0], (1, 0)) self.assertSequenceEqual(new_table.rows[50], (51, 5)) self.assertSequenceEqual(new_table.rows[499], (500, 49)) self.assertSequenceEqual(new_table.rows[500], (501, 50)) self.assertSequenceEqual(new_table.rows[998], (999, 99)) self.assertSequenceEqual(new_table.rows[999], (1000, 100)) self.assertIsInstance(new_table.columns['percentiles'][0], Decimal) self.assertIsInstance(new_table.columns['percentiles'][-1], Decimal)
def setUp(self): self.rows = ( ('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None), ('b', 3, 4, None) ) self.number_type = Number() self.text_type = Text() self.column_names = [ 'one', 'two', 'three', 'four' ] self.column_types = [ self.text_type, self.number_type, self.number_type, self.number_type ] self.table = Table(self.rows, self.column_names, self.column_types)
def test_order_by_func(self): rows = ( (1, 2, 'a'), (2, 1, 'b'), (1, 1, 'c') ) table = Table(rows, self.column_names, self.column_types) new_table = table.order_by(lambda r: (r['one'], r['two'])) self.assertIsNot(new_table, table) self.assertColumnNames(new_table, self.column_names) self.assertColumnTypes(new_table, [Number, Number, Text]) self.assertRows(new_table, [ rows[2], rows[0], rows[1] ])
def setUp(self): self.rows = ( ('a', 2, 3, 4), (None, 3, 5, None), ('a', 2, 4, None), ('b', 3, 6, None) ) self.number_type = Number() self.text_type = Text() self.columns = ( ('one', self.text_type), ('two', self.number_type), ('three', self.number_type), ('four', self.number_type) ) self.table = Table(self.rows, self.columns)
def test_denormalize_multiple_keys(self): table = Table(self.rows, self.column_names, self.column_types) normalized_table = table.denormalize(['first_name', 'last_name'], 'property', 'value') normal_rows = ( ('Jane', 'Code', 'female', 27), ('Jim', 'Program', 'male', None), ('Jim', 'Bytes', None, 24), ) self.assertRows(normalized_table, normal_rows) self.assertColumnNames(normalized_table, ['first_name', 'last_name', 'gender', 'age']) self.assertColumnTypes(normalized_table, [Text, Text, Text, Number]) self.assertRowNames(normalized_table, [('Jane', 'Code'), ('Jim', 'Program'), ('Jim', 'Bytes')])
def test_distinct_column(self): rows = ( (1, 2, 'a'), (2, None, None), (1, 1, 'c'), (1, None, None) ) table = Table(rows, self.column_names, self.column_types) new_table = table.distinct('one') self.assertIsNot(new_table, table) self.assertColumnNames(new_table, self.column_names) self.assertColumnTypes(new_table, [Number, Number, Text]) self.assertRows(new_table, [ rows[0], rows[1] ])
def test_from_json_mixed_keys(self): table = Table.from_json('examples/test_mixed.json') self.assertColumnNames(table, ['one', 'two', 'three', 'four', 'five']) self.assertColumnTypes(table, [Number, Number, Text, Text, Number]) self.assertRows(table, [ [1, 4, 'a', None, None], [2, 3, 'b', 'd', None], [None, 2, u'👍', None, 5] ])
def test_create_variable_length_rows(self): rows = ((1, 4, 'a'), (2, ), (None, 2)) table = Table(rows, self.column_names, self.column_types) warnings.simplefilter('ignore') try: table2 = Table(rows) finally: warnings.resetwarnings() self.assertColumnNames(table, self.column_names) self.assertColumnTypes(table, [Number, Number, Text]) self.assertRows(table, [(1, 4, 'a'), (2, None, None), (None, 2, None)]) self.assertColumnTypes(table2, [Number, Number, Text]) self.assertRows(table2, [(1, 4, 'a'), (2, None, None), (None, 2, None)])
def test_iter(self): warnings.simplefilter('ignore') try: table = Table(self.rows) finally: warnings.resetwarnings() for row, table_row, row_row in zip(self.rows, table, table.rows): self.assertEqual(row, table_row, row_row)
def test_homogenize_multiple_columns(self): table = Table(self.rows, self.column_names, self.column_types) def column_two(count): return [chr(ord('a') + c) for c in range(count)] homogenized = table.homogenize(['one', 'three'], zip(range(3), column_two(3)), [5]) rows = ( (0, 4, 'a'), (1, 3, 'b'), (None, 2, 'c'), (2, 5, 'c') ) homogenized.print_table() self.assertColumnNames(homogenized, self.column_names) self.assertColumnTypes(homogenized, [Number, Number, Text]) self.assertRows(homogenized, rows)
def test_slug(self): rows = (('hello world', 2), ('Ab*c #e', 2), ('He11O W0rld', 3)) expected = ['hello_world', 'ab_c_e', 'he11o_w0rld'] table = Table(rows, ['one', 'two'], [self.text_type, self.number_type]).compute([ ('slugs', Slug('one')) ]) self.assertSequenceEqual(table.columns['slugs'], expected)
def setUp(self): self.rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c')) self.number_type = NumberType() self.text_type = TextType() self.columns = (('one', self.number_type), ('two', self.number_type), ('three', self.text_type)) self.table = Table(self.rows, self.columns)
def test_data_immutable(self): rows = [ [1, 4, 'a'], [2, 3, 'b'], [None, 2, 'c'] ] table = Table(rows, self.columns) rows[0] = [2, 2, 2] self.assertSequenceEqual(table.rows[0], [1, 4, 'a'])
def test_count(self): rows = ((1, 2, 'a'), (2, 3, 'b'), (None, 4, 'c'), (1, 2, 'a'), (1, 2, 'a')) table = Table(rows, self.column_names, self.column_types) self.assertIsInstance(Count().get_aggregate_data_type(table), Number) Count().validate(self.table) self.assertEqual(Count().run(table), 5) self.assertEqual(Count().run(table), 5)
def run_sql(self, line, cell): query = cell resp = self.querier.run_sql(query) result = self.querier.async_wait_for_result(resp) resp_table = result['results'][0]['table'] column_names = resp_table['column_names'] rows_dict = [dict(zip(column_names, row)) for row in resp_table['rows']] table_result = Table.from_object(rows_dict) table_result.print_table() return table_result