Example #1
0
    def test_csv_only(self):
        """
        Ensure only the paths of the csv files
        are returned.
        """

        input_path = os.path.join(os.path.dirname(__file__),
                                  "test_data",
                                  "things_kinds")

        csv_paths = list(i_walk_csv_paths(input_path))
        self.assertEqual(2, len(csv_paths))
        for path in csv_paths:
            self.assertIn('.csv', path)
def main():
    """
    Consume many csv files as if one.
    """
    import pathlib

    input_dir = pathlib.Path('test_data/things_kinds')

    # # Use a generator expression
    # iterables = (karld.io.i_get_csv_data(data_path)
    #              for data_path in i_walk_csv_paths(str(input_dir)))

    # # or a generator map.
    iterables = imap(karld.io.i_get_csv_data,
                     i_walk_csv_paths(str(input_dir)))

    items = chain.from_iterable(iterables)

    for item in items:
        print(item[0], item[1])
def main():
    """
    Merge a number of homogeneous small csv files on a key.
    Small means they all together fit in
    your computer's memory.
    """
    import pathlib

    input_dir = pathlib.Path('test_data/things_kinds')

    data_items_iter = (karld.io.i_get_csv_data(data_path)
                       for data_path in i_walk_csv_paths(str(input_dir)))

    KINDS = 1

    groups = sort_merge_group(data_items_iter, itemgetter(KINDS))

    for group in groups:
        print(group[0])
        for item in group[1]:
            print('\t' + item[0])
        print()