Example #1
0
    def test_i_walk_dir_for_filepaths_names(self, mock_walk):
        """
        Ensure the file names paired with their paths
        are yielded for all the results returned by os.walk
        for the given root dir.
        """

        def fake_walk(root_dir):
            """
            Yield results like os.walk
            """
            yield ("dir0", ["dir1"], ["cat", "hat", "bat"])
            yield ("dir1", [], ["tin", "can"])

        mock_walk.side_effect = fake_walk

        walker = i_walk_dir_for_filepaths_names("fake")

        self.assertEqual(next(walker), ("dir0/cat", "cat"))
        self.assertEqual(next(walker), ("dir0/hat", "hat"))
        self.assertEqual(next(walker), ("dir0/bat", "bat"))
        self.assertEqual(next(walker), ("dir1/tin", "tin"))
        self.assertEqual(next(walker), ("dir1/can", "can"))

        mock_walk.assert_called_once_with("fake")
Example #2
0
def i_walk_csv_paths(input_dir):
    """
    Generator to yield the paths of csv files in the input directory.

    :param input_dir: path to the input directory
    """
    # Iterator of the filepaths and file names in the input directory
    file_path_names = i_walk_dir_for_filepaths_names(str(input_dir))

    # Iterator of just the csv files.
    csv_file_path_names = ifilter(is_file_csv, file_path_names)

    # Generator function that will yield just the paths
    return yield_nth_of(0, csv_file_path_names)
def main():
    """
    Concatenate csv files together in no particular order.
    """
    import pathlib

    input_dir = pathlib.Path("test_data/things_kinds")

    file_path_names = i_walk_dir_for_filepaths_names(str(input_dir))

    csv_file_path_names = ifilter(karld.io.is_file_csv, file_path_names)

    out_prefix = ""
    out_dir = pathlib.Path("out_data/things_kinds")
    out_filename = "combined_things.csv"

    csv_files_to_file(chain.from_iterable, out_prefix, str(out_dir), out_filename, csv_file_path_names)
Example #4
0
def serial_run_files_to_files(file_to_file, in_dir, filter_func=None):
    """
    With a map files in in_dir over the file_to_file function.

    Using this to debug your file_to_file function can
    make it easier.

    :param file_to_file: callable that takes file paths.
    :param in_dir: path to process all files from.
    :param filter_func: Takes a tuple of path and base \
    name of a file and returns a bool.
    :returns: A list of return values from the map.
    """
    results = i_walk_dir_for_filepaths_names(in_dir)
    if filter_func:
        results_final = ifilter(filter_func, results)
    else:
        results_final = results

    return list(map(file_to_file, results_final))
Example #5
0
def pool_run_files_to_files(file_to_file, in_dir, filter_func=None):
    """
    With a multi-process pool, map files in in_dir over
    file_to_file function.

    :param file_to_file: callable that takes file paths.
    :param in_dir: path to process all files from.
    :param filter_func: Takes a tuple of path and base \
    name of a file and returns a bool.
    :returns: A list of return values from the map.
    """
    from concurrent.futures import ProcessPoolExecutor

    results = i_walk_dir_for_filepaths_names(in_dir)
    if filter_func:
        results_final = ifilter(filter_func, results)
    else:
        results_final = results

    with ProcessPoolExecutor() as pool:
        return list(pool.map(file_to_file, results_final))
Example #6
0
    def test_sort_merge_csv_files_to_file(self):
        """
        Ensure csv_files_to_file will read multiple
        csv files and write one csv file
        with the contents as yielded from the
        given combiner function.

        Ensure i_walk_dir_for_filepaths_names produces
        the paths and basenames of the files in the
        test_data directory.
        """
        from karld.run_together import csv_files_to_file

        out_dir = os.path.join(tempfile.gettempdir(),
                               "karld_test_sort_merge")

        prefix = str(datetime.now())

        out_filename = "things_combined.csv"
        input_dir = os.path.join(os.path.dirname(__file__),
                                 "test_data",
                                 "things_kinds")

        file_path_names = i_walk_dir_for_filepaths_names(input_dir)

        expected_file = os.path.join(out_dir,
                                     "{}{}".format(prefix, out_filename))

        if os.path.exists(expected_file):
            os.remove(expected_file)

        csv_file_path_names = ifilter(
            is_file_csv,
            file_path_names)

        csv_files_to_file(
            combine_things,
            prefix,
            out_dir,
            out_filename,
            csv_file_path_names)

        self.assertTrue(os.path.exists(expected_file))

        with open(expected_file) as result_file:
            contents = result_file.read()
            expected_lines = ['cat,animal',
                              'cheese,dairy',
                              'apple,fruit',
                              'orange,fruit',
                              'peach,fruit',
                              'pear,fruit',
                              'tomato,fruit',
                              'mushroom,fungus',
                              'iron,metal',
                              'titanium,metal',
                              'ruby,mineral',
                              'topaz,mineral',
                              'WĄŻ,utf-8 sample',
                              'dróżką,utf-8 sample',
                              'celery,vegetable']

            lines = contents.splitlines()
            self.assertEqual(expected_lines, lines)

        if os.path.exists(expected_file):
            os.remove(expected_file)