Esempio n. 1
0
    def _finish_image(self, paths, exif_result):
        """
        Finish processing for an image. This function will:

        - (optionally) Write the EXIF data to the database. (If `config.write_exif_to_db == True`.)
        - Remove the cache file for the image
        - (optionally) Remove the input image. (If `config.delete_input == True`.)

        :param paths: Paths object representing the image file.
        :type paths: src.io.TreeWalker.Paths
        :param exif_result: JSON metadata file contents. Will be used to write to the database if database writing is
                            enabled.
        :type exif_result: dict
        """
        # If we have an active database_client, add the EXIF data to the database client.
        if self.database_client is not None and exif_result is not None:
            self.database_client.add_row(exif_result)

        # Remove the cache file
        paths.remove_cache_file()

        # Delete the input file?
        if config.delete_input:
            wait_until_path_is_found(paths.input_file)
            os.remove(paths.input_file)
            LOGGER.debug(__name__, f"Input file removed: {paths.input_file}")

            # Remove the input folder if it is empty and it's not the base input folder.
            remove_empty_folders(start_dir=paths.input_dir,
                                 top_dir=paths.base_input_dir)

        self.n_completed += 1
Esempio n. 2
0
    def _maybe_restart_worker(self, paths, worker):
        """
        Restart the worker if it has been started less than `self.max_worker_starts` times previously. Otherwise, log an
        error, and save the error image.

        :param paths: Paths object representing the image file.
        :type paths: src.io.TreeWalker.Paths
        :param worker: Worker to maybe restart
        :type worker: src.Workers.BaseWorker
        :return: True if worker was restarted, False otherwise
        :rtype: bool
        """
        if worker.n_starts > self.max_worker_starts:
            LOGGER.error(
                __name__,
                f"{worker.__class__.__name__} failed for image: {paths.input_file}.",
                save=True,
                email=True,
                email_mode="error")
            return False
        else:
            worker.start()
            LOGGER.debug(
                __name__,
                f"Restarted {worker.__class__.__name__} for image: {paths.input_file}."
            )
            return True
Esempio n. 3
0
    def get(self):
        """
        Get the result from the worker.

        :return: Return value from `self.async_func`.
        :rtype:
        """
        if self.pool is not None:
            # Try to get the result from the asynchronous worker. If it raises an exception, handle the exception.
            try:
                result = self.async_worker.get()
                assert self.result_is_valid(
                    result), f"Invalid result: '{result}'"

            except self.worker_exceptions as err:
                self.handle_error(err)
                return ERROR_RETVAL
        else:
            # The execution was not run asynchronously, which means that the result is stored in `self.async_worker`.
            result = self.async_worker

        LOGGER.debug(
            __name__,
            self.finished_message.format(image_file=self.paths.input_file))
        return result
Esempio n. 4
0
def clear_cache_file(file_path):
    """
    Clear the output files for the unfinished image whose cahce file is located at `file_path`

    :param file_path: Path to cache file for unfinished image
    :type file_path: str
    """
    # Read the JSON file
    try:
        with open(file_path, "r") as f:
            cache_info = json.load(f)
    except json.JSONDecodeError:
        # If we got a JSONDecodeError, it was most likely because the program was killed before it finished writing the
        # file. Since cache file writing is the first step when exporting the output images, we have no output images to
        # clean up. We therefore remove the (incomplete) cache file and continue.
        os.remove(file_path)
        return

    # Create a `src.io.TreeWalker.Paths` object representing the image
    paths = Paths(base_input_dir=cache_info["base_input_dir"],
                  base_mirror_dirs=cache_info["base_mirror_dirs"],
                  input_dir=cache_info["input_dir"],
                  mirror_dirs=cache_info["mirror_dirs"],
                  filename=cache_info["filename"])
    # Wait for the directories if they cannot be reached
    try:
        wait_until_path_is_found(
            [paths.base_input_dir, *paths.base_mirror_dirs])
    except PathNotReachableError as err:
        raise PathNotReachableError(f"The directories pointed to by the cache file '{file_path}' could not be found. If"
                                    f" they were deleted manually, delete this cache file and run the program again")\
                                   from err

    # Remove any expected output files if they are present
    for expected_file in get_expected_files(paths):
        if os.path.isfile(expected_file):
            os.remove(expected_file)
            LOGGER.info(
                __name__,
                f"Removed file '{expected_file}' for unfinished image '{paths.input_file}'"
            )
        else:
            LOGGER.debug(
                __name__,
                f"Could not find file '{expected_file}' for unfinished image '{paths.input_file}'"
            )
    # Remove the cache file
    os.remove(file_path)
Esempio n. 5
0
def remove_empty_folders(start_dir, top_dir):
    """
    Bottom-up removal of empty folders. If `start_dir` is empty, it will be removed. If `start_dir`'s parent directory
    is empty after removing `start_dir`, it too will be removed. This process i continued until a parent is non-empty,
    or the current directory is equal to `top_dir`. (The `top_dir` directory will not be removed).

    NOTE: Use full paths when using this function, to avoid problems when comparing the current directory to `top_dir`.

    :param start_dir: Path to bottom directory to remove if empty.
    :type start_dir: str
    :param top_dir: Top directory. Only folders under this will be deleted.
    :type top_dir:
    """
    assert start_dir.startswith(top_dir), f"remove_empty_folders: Invalid top directory '{top_dir}' for start " \
                                          f"directory '{start_dir}'"
    current_dir = start_dir
    while not os.listdir(current_dir) and current_dir != top_dir:
        os.rmdir(current_dir)
        LOGGER.debug(__name__, f"Input folder removed: {current_dir}")
        current_dir = os.path.dirname(current_dir)
Esempio n. 6
0
def clear_db_cache():
    """
    Traverse the database cache directory and insert all cached rows into the database. If insertion was successful, the
    cache files will be deleted.
    """
    if not os.path.isdir(DB_CACHE_DIR):
        return

    rows = []
    files = []
    for filename in os.listdir(DB_CACHE_DIR):
        if not filename.endswith(".pkl"):
            continue

        cache_file = os.path.join(DB_CACHE_DIR, filename)
        LOGGER.debug(__name__, f"Found database cache file: {cache_file}")
        # Load the cached row and append it to `rows`
        with open(cache_file, "rb") as f:
            rows.append(pickle.load(f))
        # Store the path to the cached row
        files.append(cache_file)

    # Return if we didn't find any valid rows.
    if not rows:
        return

    # Attempt to insert the rows into the database
    with DatabaseClient() as cli:
        cli._ignore_error_check = True
        try:
            cli.insert_or_update_rows(rows)
        except Exception as err:
            raise DatabaseError(
                f"Got error '{err}' when inserting cached rows into the database."
            ) from err

    # Remove the cache files
    for cache_file in files:
        os.remove(cache_file)
Esempio n. 7
0
    def _path_is_valid(self, input_dir, mirror_dirs, filename):
        if not filename.endswith(self.ext):
            return False

        input_filepath = os.path.join(input_dir, filename)
        if not os.access(input_filepath, os.R_OK):
            LOGGER.info(__name__,
                        f"Could not read image file '{input_filepath}'")
            return False

        if self.skip_webp:
            webp_path = os.path.join(mirror_dirs[0], self._to_webp(filename))
            if os.path.exists(webp_path):
                LOGGER.debug(
                    __name__,
                    f"Mask already found for '{input_filepath}' at '{webp_path}'."
                )
                self.n_skipped_images += 1
                return False

        self.n_valid_images += 1
        return True