def main(): """ Merge a number of homogeneous small csv files on a key. Small means they all together fit in your computer's memory. """ import pathlib input_dir = pathlib.Path('test_data/things_kinds') data_items_iter = (karld.io.i_get_csv_data(data_path) for data_path in i_walk_csv_paths(str(input_dir))) KINDS = 1 groups = sort_merge_group(data_items_iter, itemgetter(KINDS)) for group in groups: print(group[0]) for item in group[1]: print('\t' + item[0]) print()
def combine_things(iterables): """ Example iterables combiner function. The data files contain rows such as: pear, fruit cat, animal Where the first column is the thing and the second is kind. This will sort the rows of each file by kind, then merge all the rows of each file into one sorted iterable, then group them by kind and finally yield each item from each group. :param iterables: An iterable of iterable values. """ THING_KIND = 1 grouped_items = sort_merge_group(iterables, key=itemgetter(THING_KIND)) for group in grouped_items: for item in sorted(group[1]): yield item