def _finish_image(self, paths, exif_result): """ Finish processing for an image. This function will: - (optionally) Write the EXIF data to the database. (If `config.write_exif_to_db == True`.) - Remove the cache file for the image - (optionally) Remove the input image. (If `config.delete_input == True`.) :param paths: Paths object representing the image file. :type paths: src.io.TreeWalker.Paths :param exif_result: JSON metadata file contents. Will be used to write to the database if database writing is enabled. :type exif_result: dict """ # If we have an active database_client, add the EXIF data to the database client. if self.database_client is not None and exif_result is not None: self.database_client.add_row(exif_result) # Remove the cache file paths.remove_cache_file() # Delete the input file? if config.delete_input: wait_until_path_is_found(paths.input_file) os.remove(paths.input_file) LOGGER.debug(__name__, f"Input file removed: {paths.input_file}") # Remove the input folder if it is empty and it's not the base input folder. remove_empty_folders(start_dir=paths.input_dir, top_dir=paths.base_input_dir) self.n_completed += 1
def _maybe_restart_worker(self, paths, worker): """ Restart the worker if it has been started less than `self.max_worker_starts` times previously. Otherwise, log an error, and save the error image. :param paths: Paths object representing the image file. :type paths: src.io.TreeWalker.Paths :param worker: Worker to maybe restart :type worker: src.Workers.BaseWorker :return: True if worker was restarted, False otherwise :rtype: bool """ if worker.n_starts > self.max_worker_starts: LOGGER.error( __name__, f"{worker.__class__.__name__} failed for image: {paths.input_file}.", save=True, email=True, email_mode="error") return False else: worker.start() LOGGER.debug( __name__, f"Restarted {worker.__class__.__name__} for image: {paths.input_file}." ) return True
def get(self): """ Get the result from the worker. :return: Return value from `self.async_func`. :rtype: """ if self.pool is not None: # Try to get the result from the asynchronous worker. If it raises an exception, handle the exception. try: result = self.async_worker.get() assert self.result_is_valid( result), f"Invalid result: '{result}'" except self.worker_exceptions as err: self.handle_error(err) return ERROR_RETVAL else: # The execution was not run asynchronously, which means that the result is stored in `self.async_worker`. result = self.async_worker LOGGER.debug( __name__, self.finished_message.format(image_file=self.paths.input_file)) return result
def clear_cache_file(file_path): """ Clear the output files for the unfinished image whose cahce file is located at `file_path` :param file_path: Path to cache file for unfinished image :type file_path: str """ # Read the JSON file try: with open(file_path, "r") as f: cache_info = json.load(f) except json.JSONDecodeError: # If we got a JSONDecodeError, it was most likely because the program was killed before it finished writing the # file. Since cache file writing is the first step when exporting the output images, we have no output images to # clean up. We therefore remove the (incomplete) cache file and continue. os.remove(file_path) return # Create a `src.io.TreeWalker.Paths` object representing the image paths = Paths(base_input_dir=cache_info["base_input_dir"], base_mirror_dirs=cache_info["base_mirror_dirs"], input_dir=cache_info["input_dir"], mirror_dirs=cache_info["mirror_dirs"], filename=cache_info["filename"]) # Wait for the directories if they cannot be reached try: wait_until_path_is_found( [paths.base_input_dir, *paths.base_mirror_dirs]) except PathNotReachableError as err: raise PathNotReachableError(f"The directories pointed to by the cache file '{file_path}' could not be found. If" f" they were deleted manually, delete this cache file and run the program again")\ from err # Remove any expected output files if they are present for expected_file in get_expected_files(paths): if os.path.isfile(expected_file): os.remove(expected_file) LOGGER.info( __name__, f"Removed file '{expected_file}' for unfinished image '{paths.input_file}'" ) else: LOGGER.debug( __name__, f"Could not find file '{expected_file}' for unfinished image '{paths.input_file}'" ) # Remove the cache file os.remove(file_path)
def remove_empty_folders(start_dir, top_dir): """ Bottom-up removal of empty folders. If `start_dir` is empty, it will be removed. If `start_dir`'s parent directory is empty after removing `start_dir`, it too will be removed. This process i continued until a parent is non-empty, or the current directory is equal to `top_dir`. (The `top_dir` directory will not be removed). NOTE: Use full paths when using this function, to avoid problems when comparing the current directory to `top_dir`. :param start_dir: Path to bottom directory to remove if empty. :type start_dir: str :param top_dir: Top directory. Only folders under this will be deleted. :type top_dir: """ assert start_dir.startswith(top_dir), f"remove_empty_folders: Invalid top directory '{top_dir}' for start " \ f"directory '{start_dir}'" current_dir = start_dir while not os.listdir(current_dir) and current_dir != top_dir: os.rmdir(current_dir) LOGGER.debug(__name__, f"Input folder removed: {current_dir}") current_dir = os.path.dirname(current_dir)
def clear_db_cache(): """ Traverse the database cache directory and insert all cached rows into the database. If insertion was successful, the cache files will be deleted. """ if not os.path.isdir(DB_CACHE_DIR): return rows = [] files = [] for filename in os.listdir(DB_CACHE_DIR): if not filename.endswith(".pkl"): continue cache_file = os.path.join(DB_CACHE_DIR, filename) LOGGER.debug(__name__, f"Found database cache file: {cache_file}") # Load the cached row and append it to `rows` with open(cache_file, "rb") as f: rows.append(pickle.load(f)) # Store the path to the cached row files.append(cache_file) # Return if we didn't find any valid rows. if not rows: return # Attempt to insert the rows into the database with DatabaseClient() as cli: cli._ignore_error_check = True try: cli.insert_or_update_rows(rows) except Exception as err: raise DatabaseError( f"Got error '{err}' when inserting cached rows into the database." ) from err # Remove the cache files for cache_file in files: os.remove(cache_file)
def _path_is_valid(self, input_dir, mirror_dirs, filename): if not filename.endswith(self.ext): return False input_filepath = os.path.join(input_dir, filename) if not os.access(input_filepath, os.R_OK): LOGGER.info(__name__, f"Could not read image file '{input_filepath}'") return False if self.skip_webp: webp_path = os.path.join(mirror_dirs[0], self._to_webp(filename)) if os.path.exists(webp_path): LOGGER.debug( __name__, f"Mask already found for '{input_filepath}' at '{webp_path}'." ) self.n_skipped_images += 1 return False self.n_valid_images += 1 return True