def main(self): if self.args.names_only: self.print_column_names() return #Read in header and rows reader = CSVKitReader(self.input_file, **self.reader_kwargs) column_names = reader.next() if self.args.columns is None: grouped_columns_ids = [] else: grouped_columns_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based) aggregations = [] try: for (fun, cols) in map(lambda (f, cols): ( f, parse_column_identifiers(cols, column_names, self.args.zero_based)), self.args.aggregations): for col in cols: aggregations.append(aggregate_functions[fun](col)) except KeyError: self.argparser.error("Wrong aggregator function. Available: " + ', '.join(aggregate_functions.keys())) #Determine columns to group by, default to all columns #Write the output output = CSVKitWriter(self.output_file, **self.writer_kwargs) for row in group_rows(column_names, reader, grouped_columns_ids, aggregations): output.writerow(row)
def test_group_two(self): output = list(group_rows(test_header, test_data, [1, 2], [MaxAggregator(3), MinAggregator(4), CountAggregator(4), ])) self.assertEqual(len(output), 3) self.assertEqual(output[1], ['a', 'a', 3, 2, 2]) self.assertEqual(output[2], ['b', 'a', 6, 0, 2])
def test_group_zero(self): output = list(group_rows(test_header, test_data, [], [MaxAggregator(3), MinAggregator(4), CountAggregator(4), ])) self.assertEqual(len(output), 2) self.assertEqual(output[1], [6, 0, 4])
def test_header(self): output = list(group_rows(test_header, test_data, [1], [MaxAggregator(3), MinAggregator(4)])) self.assertEqual(output[0], ['h2', 'max(h4)', 'min(h5)'])