def test_merge_key_name(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='foo') table = tableset.merge() self.assertColumnNames(table, ['foo', 'letter', 'number']) self.assertColumnTypes(table, [Text, Text, Number])
def test_from_json_file(self): tableset1 = TableSet(self.tables.values(), self.tables.keys()) tableset2 = TableSet.from_json("examples/test_tableset.json") with open("examples/test_tableset.json") as f: filelike = StringIO(f.read()) tableset3 = TableSet.from_json(filelike) self.assertSequenceEqual(tableset1.column_names, tableset2.column_names, tableset3.column_names) self.assertSequenceEqual( [type(t) for t in tableset1.column_types], [type(t) for t in tableset2.column_types], [type(t) for t in tableset3.column_types], ) self.assertEqual(len(tableset1), len(tableset2), len(tableset3)) for name in ["table1", "table2", "table3"]: self.assertEqual(len(tableset1[name].columns), len(tableset2[name].columns), len(tableset3[name].columns)) self.assertEqual(len(tableset1[name].rows), len(tableset2[name].rows), len(tableset3[name].rows)) self.assertSequenceEqual(tableset1[name].rows[0], tableset2[name].rows[0], tableset3[name].rows[0]) self.assertSequenceEqual(tableset1[name].rows[1], tableset2[name].rows[1], tableset3[name].rows[1]) self.assertSequenceEqual(tableset1[name].rows[2], tableset2[name].rows[2], tableset3[name].rows[2])
def test_merge_key_name(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name="foo") table = tableset.merge() self.assertColumnNames(table, ["foo", "letter", "number"]) self.assertColumnTypes(table, [Text, Text, Number])
def test_from_json_file(self): tableset1 = TableSet(self.tables.values(), self.tables.keys()) tableset2 = TableSet.from_json('examples/test_tableset.json') with open('examples/test_tableset.json') as f: filelike = StringIO(f.read()) tableset3 = TableSet.from_json(filelike) self.assertSequenceEqual(tableset1.column_names, tableset2.column_names, tableset3.column_names) self.assertSequenceEqual([type(t) for t in tableset1.column_types], [type(t) for t in tableset2.column_types], [type(t) for t in tableset3.column_types]) self.assertEqual(len(tableset1), len(tableset2), len(tableset3)) for name in ['table1', 'table2', 'table3']: self.assertEqual(len(tableset1[name].columns), len(tableset2[name].columns), len(tableset3[name].columns)) self.assertEqual(len(tableset1[name].rows), len(tableset2[name].rows), len(tableset3[name].rows)) self.assertSequenceEqual(tableset1[name].rows[0], tableset2[name].rows[0], tableset3[name].rows[0]) self.assertSequenceEqual(tableset1[name].rows[1], tableset2[name].rows[1], tableset3[name].rows[1]) self.assertSequenceEqual(tableset1[name].rows[2], tableset2[name].rows[2], tableset3[name].rows[2])
def test_nested_aggregation(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') nested = tableset.group_by('letter') results = nested.aggregate([('letter', Length(), 'count'), ('number', Sum(), 'number_sum')]) self.assertIsInstance(results, Table) self.assertEqual(len(results.rows), 7) self.assertEqual(len(results.columns), 4) self.assertSequenceEqual(results._column_names, ('test', 'letter', 'count', 'number_sum')) self.assertSequenceEqual(results.rows[0], ('table1', 'a', 2, 4)) self.assertSequenceEqual(results.rows[1], ('table1', 'b', 1, 2)) self.assertSequenceEqual(results.rows[2], ('table2', 'b', 1, 0)) self.assertSequenceEqual(results.rows[3], ('table2', 'a', 1, 2)) self.assertSequenceEqual(results.rows[4], ('table2', 'c', 1, 5)) self.assertSequenceEqual(results.rows[5], ('table3', 'a', 2, 3)) self.assertSequenceEqual(results.rows[6], ('table3', 'c', 1, 3))
def test_proxy_table_invalid(self): tableset = TableSet(self.tables.values(), self.tables.keys()) with self.assertRaises(AttributeError) as cm: tableset.print_table() self.assertEquals(str(cm.exception), 'Table method "print_table" cannot be used as a TableSet method.')
def setUp(self): self.table1 = (('a', 1, 4), ('b', 3, 7), ('c', 2, 2)) self.table2 = (('a', 0, 3), ('b', 2, 3), ('c', 5, 3)) self.table3 = (('a', 1, 10), ('b', 2, 1), ('c', 3, None)) self.text_type = Text() self.number_type = Number() self.column_names = ['one', 'two', 'three'] self.column_types = [ self.text_type, self.number_type, self.number_type ] self.tables = OrderedDict([ ('table1', Table(self.table1, self.column_names, self.column_types)), ('table2', Table(self.table2, self.column_names, self.column_types)), ('table3', Table(self.table3, self.column_names, self.column_types)) ]) self.tablesets = TableSet(self.tables.values(), self.tables.keys())
def test_compute(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_tableset = tableset.compute([ ('new_column', Formula(self.text_type, lambda r: '%(letter)s-%(number)i' % r)) ]) new_table = new_tableset['table1'] self.assertColumnNames(new_table, ('letter', 'number', 'new_column',)) self.assertColumnTypes(new_table, (Text, Number, Text)) self.assertRows(new_table, [ ('a', 1, 'a-1'), ('a', 3, 'a-3'), ('b', 2, 'b-2') ]) new_table = new_tableset['table2'] self.assertRows(new_table, [ ('b', 0, 'b-0'), ('a', 2, 'a-2'), ('c', 5, 'c-5') ]) new_table = new_tableset['table3'] self.assertSequenceEqual(new_table.rows[0], ('a', 1, 'a-1')) self.assertSequenceEqual(new_table.rows[1], ('a', 2, 'a-2')) self.assertSequenceEqual(new_table.rows[2], ('c', 3, 'c-3'))
def test_compute(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_tableset = tableset.compute([ (Formula(self.text_type, lambda r: '%(letter)s-%(number)i' % r), 'new_column') ]) new_table = new_tableset['table1'] self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table._column_types, (self.text_type, self.number_type, self.text_type,)) self.assertSequenceEqual(new_table._column_names, ('letter', 'number', 'new_column',)) self.assertSequenceEqual(new_table.rows[0], ('a', 1, 'a-1')) self.assertSequenceEqual(new_table.rows[1], ('a', 3, 'a-3')) self.assertSequenceEqual(new_table.rows[2], ('b', 2, 'b-2')) new_table = new_tableset['table2'] self.assertSequenceEqual(new_table.rows[0], ('b', 0, 'b-0')) self.assertSequenceEqual(new_table.rows[1], ('a', 2, 'a-2')) self.assertSequenceEqual(new_table.rows[2], ('c', 5, 'c-5')) new_table = new_tableset['table3'] self.assertSequenceEqual(new_table.rows[0], ('a', 1, 'a-1')) self.assertSequenceEqual(new_table.rows[1], ('a', 2, 'a-2')) self.assertSequenceEqual(new_table.rows[2], ('c', 3, 'c-3'))
def test_compute(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_tableset = tableset.compute( [(Formula(self.text_type, lambda r: "%(letter)s-%(number)i" % r), "new_column")] ) new_table = new_tableset["table1"] self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table._column_types, (self.text_type, self.number_type, self.text_type)) self.assertSequenceEqual(new_table._column_names, ("letter", "number", "new_column")) self.assertSequenceEqual(new_table.rows[0], ("a", 1, "a-1")) self.assertSequenceEqual(new_table.rows[1], ("a", 3, "a-3")) self.assertSequenceEqual(new_table.rows[2], ("b", 2, "b-2")) new_table = new_tableset["table2"] self.assertSequenceEqual(new_table.rows[0], ("b", 0, "b-0")) self.assertSequenceEqual(new_table.rows[1], ("a", 2, "a-2")) self.assertSequenceEqual(new_table.rows[2], ("c", 5, "c-5")) new_table = new_tableset["table3"] self.assertSequenceEqual(new_table.rows[0], ("a", 1, "a-1")) self.assertSequenceEqual(new_table.rows[1], ("a", 2, "a-2")) self.assertSequenceEqual(new_table.rows[2], ("c", 3, "c-3"))
def test_aggregate_key_name(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name="test") new_table = tableset.aggregate([("count", Length())]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ("test", "count")) self.assertColumnTypes(new_table, [Text, Number])
def test_aggregeate_bad_column(self): tableset = TableSet(self.tables.values(), self.tables.keys()) with self.assertRaises(KeyError): tableset.aggregate([("one", Sum(), "one_sum")]) with self.assertRaises(KeyError): tableset.aggregate([("bad", Sum(), "bad_sum")])
def test_merge_key_name(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='foo') table = tableset.merge() self.assertSequenceEqual(table.column_names, ['foo', 'letter', 'number']) self.assertIsInstance(table.column_types[0], Text) self.assertSequenceEqual(table.column_types[1:], [self.text_type, self.number_type])
def test_aggregeate_bad_column(self): tableset = TableSet(self.tables.values(), self.tables.keys()) with self.assertRaises(KeyError): tableset.aggregate([('one', Sum(), 'one_sum')]) with self.assertRaises(KeyError): tableset.aggregate([('bad', Sum(), 'bad_sum')])
def test_print_structure(self): tableset = TableSet(self.tables.values(), self.tables.keys()) output = StringIO() tableset.print_structure(output=output) lines = output.getvalue().strip().split('\n') self.assertEqual(len(lines), 5)
def test_select(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_tableset = tableset.select(['number']) for name, new_table in new_tableset.items(): self.assertColumnNames(new_table, ('number',)) self.assertColumnTypes(new_table, (Number,))
def test_print_structure(self): tableset = TableSet(self.tables.values(), self.tables.keys()) output = StringIO() tableset.print_structure(output=output) lines = output.getvalue().strip().split('\n') self.assertEqual(len(lines), 7)
def test_aggregate_row_names(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_table = tableset.aggregate([ ('count', Count()) ]) self.assertRowNames(new_table, ['table1', 'table2', 'table3'])
def test_aggregeate_bad_column(self): tableset = TableSet(self.tables) with self.assertRaises(ColumnDoesNotExistError): tableset.aggregate([('one', Sum(), 'one_sum')]) with self.assertRaises(ColumnDoesNotExistError): tableset.aggregate([('bad', Sum(), 'bad_sum')])
def test_aggregate_row_names(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name="test") new_table = tableset.aggregate([("number", Length(), "count")]) self.assertSequenceEqual(new_table.row_names, ["table1", "table2", "table3"]) self.assertSequenceEqual(new_table.rows["table1"], ["table1", 3]) self.assertSequenceEqual(new_table.rows["table2"], ["table2", 3]) self.assertSequenceEqual(new_table.rows["table3"], ["table3", 3])
def test_aggregate_sum(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_table = tableset.aggregate([("count", Length()), ("number_sum", Sum("number"))]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ("group", "count", "number_sum")) self.assertColumnTypes(new_table, [Text, Number, Number]) self.assertRows(new_table, [("table1", 3, 6), ("table2", 3, 7), ("table3", 3, 6)])
def test_aggregate_max_length(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_table = tableset.aggregate([("count", Length()), ("letter_max_length", MaxLength("letter"))]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ("group", "count", "letter_max_length")) self.assertColumnTypes(new_table, [Text, Number, Number]) self.assertRows(new_table, [("table1", 3, 1), ("table2", 3, 1), ("table3", 3, 1)])
def test_select(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_tableset = tableset.select(['number']) for name, new_table in new_tableset.items(): self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 1) self.assertSequenceEqual(new_table._column_types, (self.number_type,)) self.assertSequenceEqual(new_table._column_names, ('number',))
def test_aggregate_key_name(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_table = tableset.aggregate([ ('count', Count()) ]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ('test', 'count')) self.assertColumnTypes(new_table, [Text, Number])
def test_merge(self): tableset = TableSet(self.tables.values(), self.tables.keys()) table = tableset.merge() self.assertColumnNames(table, ['group', 'letter', 'number']) self.assertColumnTypes(table, [Text, Text, Number]) self.assertEqual(len(table.rows), 9) self.assertSequenceEqual(table.rows[0], ['table1', 'a', 1]) self.assertSequenceEqual(table.rows[8], ['table3', 'c', 3])
def test_print_structure_row_limit(self): tables = self.tables for i in range(25): tables[str(i)] = self.tables['table1'] tableset = TableSet(tables.values(), tables.keys()) output = StringIO() tableset.print_structure(output=output) lines = output.getvalue().strip().split('\n') self.assertEqual(len(lines), 22)
def test_print_structure_row_limit(self): tables = self.tables for i in range(25): tables[str(i)] = self.tables['table1'] tableset = TableSet(tables.values(), tables.keys()) output = StringIO() tableset.print_structure(output=output) lines = output.getvalue().strip().split('\n') self.assertEqual(len(lines), 24)
def test_aggregate_row_names(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_table = tableset.aggregate([ ('number', Length(), 'count') ]) self.assertSequenceEqual(new_table.row_names, ['table1', 'table2', 'table3']) self.assertSequenceEqual(new_table.rows['table1'], ['table1', 3]) self.assertSequenceEqual(new_table.rows['table2'], ['table2', 3]) self.assertSequenceEqual(new_table.rows['table3'], ['table3', 3])
def test_merge(self): tableset = TableSet(self.tables.values(), self.tables.keys()) table = tableset.merge() self.assertColumnNames(table, ["group", "letter", "number"]) self.assertColumnTypes(table, [Text, Text, Number]) self.assertEqual(len(table.rows), 9) self.assertSequenceEqual(table.rows[0], ["table1", "a", 1]) self.assertSequenceEqual(table.rows[8], ["table3", "c", 3])
def test_aggregate_min(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_table = tableset.aggregate([('count', Length()), ('number_min', Min('number'))]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ('group', 'count', 'number_min')) self.assertColumnTypes(new_table, [Text, Number, Number]) self.assertRows(new_table, [('table1', 3, 1), ('table2', 3, 0), ('table3', 3, 1)])
def test_merge_groups(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='foo') table = tableset.merge(groups=['red', 'blue', 'green'], group_name='color_code') self.assertColumnNames(table, ['color_code', 'letter', 'number']) self.assertColumnTypes(table, [Text, Text, Number]) self.assertEqual(len(table.rows), 9) self.assertSequenceEqual(table.rows[0], ['red', 'a', 1]) self.assertSequenceEqual(table.rows[8], ['green', 'c', 3])
def test_merge_groups(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name="foo") table = tableset.merge(groups=["red", "blue", "green"], group_name="color_code") self.assertColumnNames(table, ["color_code", "letter", "number"]) self.assertColumnTypes(table, [Text, Text, Number]) self.assertEqual(len(table.rows), 9) self.assertSequenceEqual(table.rows[0], ["red", "a", 1]) self.assertSequenceEqual(table.rows[8], ["green", "c", 3])
def test_aggregate_sum(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_table = tableset.aggregate([('count', Count()), ('number_sum', Sum('number'))]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ('group', 'count', 'number_sum')) self.assertColumnTypes(new_table, [Text, Number, Number]) self.assertRows(new_table, [('table1', 3, 6), ('table2', 3, 7), ('table3', 3, 6)])
def test_having_simple(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_tableset = tableset.having([ ('count', Count()) ], lambda t: t['count'] < 3) self.assertIsInstance(new_tableset, TableSet) self.assertSequenceEqual(new_tableset.keys(), ['table3']) self.assertIs(new_tableset.values()[0], tableset['table3']) self.assertEqual(new_tableset.key_name, 'test')
def test_aggregate_key_name(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name="test") new_table = tableset.aggregate([("number", Length(), "count")]) self.assertIsInstance(new_table, Table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 2) self.assertSequenceEqual(new_table._column_names, ("test", "count")) self.assertIsInstance(new_table._column_types[0], Text) self.assertIsInstance(new_table._column_types[1], Number)
def test_merge(self): tableset = TableSet(self.tables.values(), self.tables.keys()) table = tableset.merge() self.assertSequenceEqual(table.column_names, ['group', 'letter', 'number']) self.assertIsInstance(table.column_types[0], Text) self.assertSequenceEqual(table.column_types[1:], [self.text_type, self.number_type]) self.assertEqual(len(table.rows), 9) self.assertSequenceEqual(table.rows[0], ['table1', 'a', 1]) self.assertSequenceEqual(table.rows[8], ['table3', 'c', 3])
def test_having_simple(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_tableset = tableset.having([('count', Count())], lambda t: t['count'] < 3) self.assertIsInstance(new_tableset, TableSet) self.assertSequenceEqual(new_tableset.keys(), ['table3']) self.assertIs(new_tableset.values()[0], tableset['table3']) self.assertEqual(new_tableset.key_name, 'test')
def test_having_complex(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_tableset = tableset.having([ ('count', Count()), ('number_sum', Sum('number')) ], lambda t: t['count'] >= 3 and t['number_sum'] > 6) self.assertIsInstance(new_tableset, TableSet) self.assertSequenceEqual(new_tableset.keys(), ['table2']) self.assertIs(new_tableset.values()[0], tableset['table2']) self.assertEqual(new_tableset.key_name, 'test')
def test_proxy_maintains_key(self): number_type = Number() tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='foo', key_type=number_type) self.assertEqual(tableset.key_name, 'foo') self.assertEqual(tableset.key_type, number_type) new_tableset = tableset.select(['number']) self.assertEqual(new_tableset.key_name, 'foo') self.assertEqual(new_tableset.key_type, number_type)
def test_aggregate_sum(self): tableset = TableSet(self.tables) new_table = tableset.aggregate([('number', Sum(), 'number_sum')]) self.assertIsInstance(new_table, Table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table._column_names, ('group', 'count', 'number_sum')) self.assertSequenceEqual(new_table.rows[0], ('table1', 3, 6)) self.assertSequenceEqual(new_table.rows[1], ('table2', 3, 7)) self.assertSequenceEqual(new_table.rows[2], ('table3', 3, 6))
def test_aggregate_key_name(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_table = tableset.aggregate([ ('number', Length(), 'count') ]) self.assertIsInstance(new_table, Table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 2) self.assertSequenceEqual(new_table._column_names, ('test', 'count')) self.assertIsInstance(new_table._column_types[0], Text) self.assertIsInstance(new_table._column_types[1], Number)
def test_having_complex(self): tableset = TableSet(self.tables.values(), self.tables.keys(), key_name='test') new_tableset = tableset.having( [('count', Count()), ('number_sum', Sum('number'))], lambda t: t['count'] >= 3 and t['number_sum'] > 6) self.assertIsInstance(new_tableset, TableSet) self.assertSequenceEqual(new_tableset.keys(), ['table2']) self.assertIs(new_tableset.values()[0], tableset['table2']) self.assertEqual(new_tableset.key_name, 'test')
def test_aggregate_min(self): tableset = TableSet(self.tables) new_table = tableset.aggregate([('number', Min(), 'number_min')]) self.assertIsInstance(new_table, Table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table._column_names, ('group', 'count', 'number_min')) self.assertIsInstance(new_table.columns['number_min'], NumberColumn) self.assertSequenceEqual(new_table.rows[0], ('table1', 3, 1)) self.assertSequenceEqual(new_table.rows[1], ('table2', 3, 0)) self.assertSequenceEqual(new_table.rows[2], ('table3', 3, 1))
def test_aggregate_max_length(self): tableset = TableSet(self.tables) new_table = tableset.aggregate([('letter', MaxLength(), 'letter_max_length')]) self.assertIsInstance(new_table, Table) self.assertEqual(len(new_table.rows), 3) self.assertEqual(len(new_table.columns), 3) self.assertSequenceEqual(new_table._column_names, ('group', 'count', 'letter_max_length')) self.assertSequenceEqual(new_table.rows[0], ('table1', 3, 1)) self.assertSequenceEqual(new_table.rows[1], ('table2', 3, 1)) self.assertSequenceEqual(new_table.rows[2], ('table3', 3, 1))
def test_aggregate_two_ops(self): tableset = TableSet(self.tables.values(), self.tables.keys()) new_table = tableset.aggregate([('count', Length()), ('number_sum', Sum('number')), ('number_mean', Mean('number'))]) self.assertIsInstance(new_table, Table) self.assertColumnNames(new_table, ('group', 'count', 'number_sum', 'number_mean')) self.assertColumnTypes(new_table, [Text, Number, Number, Number]) self.assertRows(new_table, [('table1', 3, 6, 2), ('table2', 3, 7, Decimal(7) / 3), ('table3', 3, 6, 2)])
def test_to_json(self): tableset = TableSet(self.tables.values(), self.tables.keys()) tableset.to_json('.test-tableset') for name in ['table1', 'table2', 'table3']: with open('.test-tableset/%s.json' % name) as f: contents1 = json.load(f) with open('examples/tableset/%s.json' % name) as f: contents2 = json.load(f) self.assertEqual(contents1, contents2) shutil.rmtree('.test-tableset')
def test_to_csv(self): tableset = TableSet(self.tables) tableset.to_csv('.test-tableset') for name in ['table1', 'table2', 'table3']: with open('.test-tableset/%s.csv' % name) as f: contents1 = f.read() with open('examples/tableset/%s.csv' % name) as f: contents2 = f.read() self.assertEqual(contents1, contents2) shutil.rmtree('.test-tableset')