Beispiel #1
0
def get_results(coco, imgs_dir):
    """
    Get the masking results for all images in `imgs_dir`.

    :param coco: COCO object representing the dataset.
    :type coco: COC
    :param imgs_dir: Path to base directory with images to use for evaluation.
    :type imgs_dir: str
    :return: Masking results. Image IDs are keys, and masking results (output from `Masker.mask`) are values.
    :rtype: dict
    """
    LOGGER.info(__name__, "Building results.")

    tree_walker = TreeWalker(imgs_dir, [], skip_webp=False, precompute_paths=True)
    dataset = get_tf_dataset(tree_walker)
    dataset_iterator = iter(dataset)

    filename_to_image_id = {img_dict["file_name"]: _id for _id, img_dict in coco.imgs.items()}
    masker = Masker()
    results = {}

    for i, paths in enumerate(tree_walker.walk()):
        tic = time.time()

        img = next(dataset_iterator)
        mask_results = masker.mask(img)
        image_id = filename_to_image_id[paths.filename]
        results[image_id] = mask_results

        dt = time.time() - tic
        LOGGER.info(__name__, f"Processed image {i+1}/{tree_walker.n_valid_images} in {round(dt, 2)} s. "
                              f"File: {paths.filename}")
    return results
Beispiel #2
0
def _copy_file(source_file, destination_file):
    if os.path.exists(destination_file):
        LOGGER.warning(
            __name__,
            f"Archive file {destination_file} already exists. The existing file will be "
            f"overwritten.")
    copy2(source_file, destination_file)
def wait_until_path_is_found(paths,
                             retry_interval=config.file_access_retry_seconds,
                             timeout=config.file_access_timeout_seconds):
    """
    Blocks execution until all elements of `paths` are valid paths, for `timeout` seconds. If the timeout is reached,
    and one or more paths still do not exist, a `PathNotReachableError` will be raised.

    :param paths: Iterable where each element is a string of paths. The elements can also be `bytes`.
    :type paths: list of str | tuple of str | np.ndarray
    :param retry_interval: Number of seconds to wait between each retry.
    :type retry_interval: int
    :param timeout: Total number of seconds to wait.
    :type timeout: int
    :return: 0, if the existence of all paths is confirmed before the timeout is reached.
    :rtype: int
    """
    total_wait_time = 0

    if not isinstance(paths, (list, tuple, np.ndarray)):
        paths = [paths]

    while not all_exists(paths):
        time.sleep(retry_interval)
        total_wait_time += retry_interval
        if total_wait_time > timeout:
            raise PathNotReachableError(
                f"At least one of the paths in {paths} could not be reached in {timeout}s. "
                f"Aborting.")
        else:
            LOGGER.warning(
                __name__,
                f"At least one of the paths in {paths} could not be reached. Retrying."
            )
    return 0
Beispiel #4
0
def initialize():
    logging.basicConfig(level=logging.DEBUG,
                        format=LOGGER.fmt,
                        datefmt=LOGGER.datefmt)
    set_excepthook([logger_excepthook])

    args = get_args()

    if args.log_folder is not None:
        os.makedirs(args.log_folder, exist_ok=True)
        log_file_name = config.log_file_name.format(
            datetime=datetime.now().strftime("%Y-%m-%d_%H%M%S"),
            hostname=gethostname())
        log_file = os.path.join(args.log_folder, log_file_name)
        LOGGER.set_log_file(log_file)

    tree_walker = TreeWalker(args.input_dir, [],
                             skip_webp=False,
                             precompute_paths=True,
                             ext="json")
    database_client = DatabaseClient(
        table_name=args.table_name,
        max_n_accumulated_rows=config.db_max_n_accumulated_rows,
        max_n_errors=config.db_max_n_errors,
        max_cache_size=config.db_max_cache_size,
        enable_cache=False)

    return tree_walker, database_client
Beispiel #5
0
def initialize():
    logging.basicConfig(level=logging.DEBUG,
                        format=LOGGER.fmt,
                        datefmt=LOGGER.datefmt)
    set_excepthook([logger_excepthook])

    args = get_args()

    if args.log_folder is not None:
        log_dir = os.path.abspath(args.log_folder)
        os.makedirs(args.log_folder, exist_ok=True)
        log_file_name = config.log_file_name.format(
            datetime=datetime.now().strftime("%Y-%m-%d_%H%M%S"),
            hostname=gethostname())
        log_file = os.path.join(log_dir, log_file_name)
        LOGGER.set_log_file(log_file)

    input_dir = os.path.abspath(args.input_folder)
    output_dir = os.path.abspath(args.output_folder)

    os.makedirs(args.output_folder, exist_ok=True)
    tree_walker = TreeWalker(input_dir, [output_dir],
                             skip_webp=False,
                             precompute_paths=True)
    return tree_walker
Beispiel #6
0
    def get(self):
        """
        Get the result from the worker.

        :return: Return value from `self.async_func`.
        :rtype:
        """
        if self.pool is not None:
            # Try to get the result from the asynchronous worker. If it raises an exception, handle the exception.
            try:
                result = self.async_worker.get()
                assert self.result_is_valid(
                    result), f"Invalid result: '{result}'"

            except self.worker_exceptions as err:
                self.handle_error(err)
                return ERROR_RETVAL
        else:
            # The execution was not run asynchronously, which means that the result is stored in `self.async_worker`.
            result = self.async_worker

        LOGGER.debug(
            __name__,
            self.finished_message.format(image_file=self.paths.input_file))
        return result
Beispiel #7
0
    def _finish_image(self, paths, exif_result):
        """
        Finish processing for an image. This function will:

        - (optionally) Write the EXIF data to the database. (If `config.write_exif_to_db == True`.)
        - Remove the cache file for the image
        - (optionally) Remove the input image. (If `config.delete_input == True`.)

        :param paths: Paths object representing the image file.
        :type paths: src.io.TreeWalker.Paths
        :param exif_result: JSON metadata file contents. Will be used to write to the database if database writing is
                            enabled.
        :type exif_result: dict
        """
        # If we have an active database_client, add the EXIF data to the database client.
        if self.database_client is not None and exif_result is not None:
            self.database_client.add_row(exif_result)

        # Remove the cache file
        paths.remove_cache_file()

        # Delete the input file?
        if config.delete_input:
            wait_until_path_is_found(paths.input_file)
            os.remove(paths.input_file)
            LOGGER.debug(__name__, f"Input file removed: {paths.input_file}")

            # Remove the input folder if it is empty and it's not the base input folder.
            remove_empty_folders(start_dir=paths.input_dir,
                                 top_dir=paths.base_input_dir)

        self.n_completed += 1
Beispiel #8
0
    def process_image(self, image, paths):
        """
        Run the processing pipeline for `image`.

        :param image: Input image. Must be a 4D color image tensor with shape (1, height, width, 3)
        :type image: tf.python.framework.ops.EagerTensor
        :param paths: Paths object representing the image file.
        :type paths: src.io.TreeWalker.Paths
        """
        start_time = time.time()
        # Compute the detected objects and their masks.
        mask_results = self.masker.mask(image)
        time_delta = "{:.3f}".format(time.time() - start_time)
        LOGGER.info(
            __name__,
            f"Masked image in {time_delta} s. File: {paths.input_file}")

        # Convert the image to a numpy array
        if not isinstance(image, np.ndarray):
            image = image.numpy()

        # If we have reached the maximum number of workers. Wait for them to finish
        if len(self.workers) >= self.max_num_async_workers:
            self._wait_for_workers()
        # Create workers for the current image.
        self._spawn_workers(paths, image, mask_results)
Beispiel #9
0
def get_mappenavn(image_path, exif):
    dirs = image_path.split(os.sep)[:-1]
    if config.exif_top_dir in dirs:
        # Uncomment below for forward-slash separator or backward-slash.
        rel_path = "/".join(dirs[(dirs.index(config.exif_top_dir) + 1):])
        # rel_path = os.sep.join(dirs[(dirs.index(config.exif_top_dir) + 1):])
    else:
        LOGGER.warning(
            __name__,
            f"Top directory '{config.exif_top_dir}' not found in image path '{image_path}'. "
            f"'rel_path' will be empty")
        rel_path = ""

    timestamp = iso8601.parse_date(exif["exif_tid"])
    format_values = dict(aar=timestamp.year,
                         maaned=timestamp.month,
                         dag=timestamp.day,
                         fylke=str(exif["exif_fylke"]).zfill(2),
                         vegkat=exif["exif_vegkat"],
                         vegstat=exif["exif_vegstat"],
                         vegnr=exif["exif_vegnr"],
                         hp=exif["exif_hp"],
                         meter=exif["exif_meter"],
                         feltkode=exif["exif_feltkode"],
                         strekningreferanse=exif["exif_strekningreferanse"],
                         relative_input_dir=rel_path)
    folder_name = config.exif_mappenavn.format(**format_values)
    assert "{" not in folder_name and "}" not in folder_name, f"Invalid `Mappenavn`: {config.db_folder_name} -> " \
                                                              f"{folder_name}."
    return folder_name
Beispiel #10
0
    def _maybe_restart_worker(self, paths, worker):
        """
        Restart the worker if it has been started less than `self.max_worker_starts` times previously. Otherwise, log an
        error, and save the error image.

        :param paths: Paths object representing the image file.
        :type paths: src.io.TreeWalker.Paths
        :param worker: Worker to maybe restart
        :type worker: src.Workers.BaseWorker
        :return: True if worker was restarted, False otherwise
        :rtype: bool
        """
        if worker.n_starts > self.max_worker_starts:
            LOGGER.error(
                __name__,
                f"{worker.__class__.__name__} failed for image: {paths.input_file}.",
                save=True,
                email=True,
                email_mode="error")
            return False
        else:
            worker.start()
            LOGGER.debug(
                __name__,
                f"Restarted {worker.__class__.__name__} for image: {paths.input_file}."
            )
            return True
Beispiel #11
0
 def remove_cache_file(self):
     if os.path.isfile(self.cache_file):
         os.remove(self.cache_file)
     else:
         LOGGER.warning(
             __name__,
             f"Attempted to remove cache file '{self.cache_file}', but is does not exist."
         )
Beispiel #12
0
def ID(json_data):
    # Try to get 'bildeuuid' from the json_data.
    image_id = json_data.get("bildeid", None)

    # If 'bilde_id' could not be found in the json_data. Create it from the contents.
    if image_id is None:
        LOGGER.warning(__name__, "Could not find 'bildeid' in JSON data. The ID will be created from the contents of "
                                 "the JSON data instead.")
        image_id = get_deterministic_id(json_data)

    return image_id
Beispiel #13
0
    def _init_model(self):
        """
        Initialize the TensorFlow-graph
        """
        saved_model_path = os.path.join(config.MODEL_PATH, "saved_model")
        # Download and extract model

        if not os.path.exists(saved_model_path):
            LOGGER.info(__name__, "Could not find the model graph file. Downloading...")
            download_model(config.DOWNLOAD_BASE, config.MODEL_NAME, config.MODEL_PATH, extract_all=True)
            LOGGER.info(__name__, "Model graph file downloaded.")

        model = tf.saved_model.load(saved_model_path)
        self.model = model.signatures["serving_default"]
Beispiel #14
0
def send_mail(message_type, etype=None, ex=None, tb=None, msg=None):
    """
    Send an email of type `message_type`. The sender, receiver(s) and smtp-server are configured in `email_config.py`.
    If  `--log-folder` is specified to `src.main`, the log-file will be attached to the message.

    :param message_type: Type of message. This determines the subject and contents of the message. Must be one of

                         - `critical`: This is suitable for critical errors which cause the program to exit abnormally.
                                       A critical message requires `etype`, `ex` and `tb` to be specified, and will
                                       include the exception type in the subject, and the traceback in the contents.
                         - `error`: This message is suitable for processing errors which do not cause the program to
                                    exit.
                         - `finished`: This message type should be used when the program exits normally.

    :type message_type: str
    :param etype: Exception type
    :type etype: type | None
    :param ex: Exception instance
    :type ex: BaseException | None
    :param tb: Traceback object
    :type tb: traceback.traceback | None
    :param msg: Message to include in the contents of the email.
    :type msg: str | None
    """
    # Determine subject
    if message_type == "critical":
        msg = "".join(traceback.format_exception(etype, ex, tb))
        subject = CRITICAL_SUBJECT.format(etype=etype.__name__,
                                          hostname=gethostname())
    elif message_type == "error":
        subject = ERROR_SUBJECT.format(hostname=gethostname())
    elif message_type == "finished":
        subject = FINISHED_SUBJECT.format(hostname=gethostname())
    else:
        raise ValueError(
            f"Function `email.send_mail` got invalid message type: {message_type}"
        )
    # Create the message
    message = create_base_message(subject, msg)
    # Try to send the email. If sending fails, log the message as an error, and continue.
    try:
        with smtplib.SMTP(email_config.smtp_host, email_config.port) as smtp:
            smtp.sendmail(from_addr=email_config.from_address,
                          to_addrs=email_config.to_addresses,
                          msg=message)
    except Exception as err:
        LOGGER.error(
            __name__,
            f"Got error '{str(err)}' when attempting to send e-mail.")
Beispiel #15
0
def clear_cache_file(file_path):
    """
    Clear the output files for the unfinished image whose cahce file is located at `file_path`

    :param file_path: Path to cache file for unfinished image
    :type file_path: str
    """
    # Read the JSON file
    try:
        with open(file_path, "r") as f:
            cache_info = json.load(f)
    except json.JSONDecodeError:
        # If we got a JSONDecodeError, it was most likely because the program was killed before it finished writing the
        # file. Since cache file writing is the first step when exporting the output images, we have no output images to
        # clean up. We therefore remove the (incomplete) cache file and continue.
        os.remove(file_path)
        return

    # Create a `src.io.TreeWalker.Paths` object representing the image
    paths = Paths(base_input_dir=cache_info["base_input_dir"],
                  base_mirror_dirs=cache_info["base_mirror_dirs"],
                  input_dir=cache_info["input_dir"],
                  mirror_dirs=cache_info["mirror_dirs"],
                  filename=cache_info["filename"])
    # Wait for the directories if they cannot be reached
    try:
        wait_until_path_is_found(
            [paths.base_input_dir, *paths.base_mirror_dirs])
    except PathNotReachableError as err:
        raise PathNotReachableError(f"The directories pointed to by the cache file '{file_path}' could not be found. If"
                                    f" they were deleted manually, delete this cache file and run the program again")\
                                   from err

    # Remove any expected output files if they are present
    for expected_file in get_expected_files(paths):
        if os.path.isfile(expected_file):
            os.remove(expected_file)
            LOGGER.info(
                __name__,
                f"Removed file '{expected_file}' for unfinished image '{paths.input_file}'"
            )
        else:
            LOGGER.debug(
                __name__,
                f"Could not find file '{expected_file}' for unfinished image '{paths.input_file}'"
            )
    # Remove the cache file
    os.remove(file_path)
Beispiel #16
0
    def _update_rows(self, cursor, rows):
        LOGGER.info(
            __name__,
            f"Attempting to update {len(rows)} row(s) in the database.")
        # Attempt to update the rows. When we have `batcherrors = True`, the valid rows will be updated normally.
        cursor.executemany(self.table.update_sql, rows, batcherrors=True)
        # Get the errors caused by the rows where the update failed.
        errors = [e for e in cursor.getbatcherrors()]

        # Add number of updated rows to total counter
        n_updated = len(rows) - len(errors)
        self.total_updated += n_updated

        LOGGER.info(
            __name__,
            f"Successfully updated {n_updated} row(s) in the database.")
        return errors
Beispiel #17
0
def clear_cache():
    """
    Clear the cache directory. Each JSON file in the cache directory is expected to represent an image for which the
    export process was aborted due to a critical error. This function will clear the output files written for the
    unfinished image, and then delete the cache file.
    """
    # Return if we couldn't find a cache directory. This probably means that this is the first time the application is
    # ran on this machine, so the cache directory has not been created yet
    if not os.path.exists(config.CACHE_DIRECTORY):
        return

    LOGGER.info(__name__, "Clearing cache files")
    count = 0
    for filename in os.listdir(config.CACHE_DIRECTORY):
        if filename.endswith(".json"):
            clear_cache_file(os.path.join(config.CACHE_DIRECTORY, filename))
            count += 1
    LOGGER.info(__name__, f"Found and cleared {count} cache file(s)")
Beispiel #18
0
def get_exif(img, image_path):
    """
    Parse the EXIF data from `img`.

    :param img: Input image
    :type img: PIL.Image
    :param image_path: Path to input image. Used to recreate metadata when EXIF-header is missing
    :type image_path: str
    :return: EXIF data
    :rtype: dict
    """
    # Make a copy of the template dictionary. Values from the EXIF header will be inserted into this dict.
    parsed_exif = EXIF_TEMPLATE.copy()

    # Get the EXIF data
    exif = img._getexif()

    if exif is not None:
        # Convert the integer keys in the exif dict to text
        labeled = label_exif(exif)
        # Process the `ImageProperties` XML
        image_properties_xml = labeled.get("ImageProperties", None)
        assert image_properties_xml is not None, "Unable to get key 40055:`ImageProperties` from EXIF."
        process_image_properties(image_properties_xml, parsed_exif)
        # Process the `ReflinkInfo` XML if it is available
        reflink_info_xml = labeled.get("ReflinkInfo", None)
        process_reflink_info(reflink_info_xml, parsed_exif)
        # Title of image.
        XPTitle = labeled.get("XPTitle", b"").decode("utf16")
        parsed_exif["exif_xptitle"] = XPTitle
    else:
        LOGGER.warning(
            __name__,
            "No EXIF data found for image. Attempting to reconstruct data from image path."
        )
        if image_path is not None:
            get_metadata_from_path(image_path, parsed_exif)

    # Get a deterministic ID from the exif data.
    parsed_exif["bildeid"] = get_deterministic_id(parsed_exif)
    # Insert the folder name
    parsed_exif["mappenavn"] = get_mappenavn(image_path, parsed_exif)
    return parsed_exif
Beispiel #19
0
    def _insert_rows(self, cursor, rows):
        LOGGER.info(
            __name__,
            f"Attempting to insert {len(rows)} row(s) into the database.")
        # Attempt to insert the rows into the database. When we have `batcherrors = True`, the rows which do not
        # violate the unique constraint will be inserted normally. The rows which do violate the constraint will
        # not be inserted.
        cursor.executemany(self.table.insert_sql, rows, batcherrors=True)
        # Get the indices of the rows where the insertion failed.
        errors = [e for e in cursor.getbatcherrors()]

        # Add number of inserted rows to total counter
        n_inserted = len(rows) - len(errors)
        self.total_inserted += n_inserted

        LOGGER.info(
            __name__,
            f"Successfully inserted {n_inserted} row(s) into the database.")
        return errors
Beispiel #20
0
    def handle_errors(self, errors, rows, action="writing to"):
        """
        Log errors caused when running `cursor.executemany`.

        :param errors: Errors from `cursor.getbatcherrors`
        :type errors: list
        :param rows: Rows which caused the errors
        :type rows: list of dict
        :param action: Optional database action for the error message.
        :type action: str
        """
        # Increment total error counter
        self.total_errors += len(errors)

        # Create an error message
        msg = f"Got {len(errors)} error(s) while {action} the database:\n"
        msg += "\n".join([err.message for err in errors])
        # Log the error
        LOGGER.error(__name__, msg, save=False, email=True, email_mode="error")
Beispiel #21
0
def check_all_files_written(paths):
    """
    Check that all files for a given image have been saved correctly. The list of checked files is determined by the
    File I/O parameters in `config`. If all expected output files exist, the cache file will be deleted. If all expected
    output files exist, AND `config.delete_input` is True, the input image will be deleted as well.

    :param paths: Paths object representing the input image
    :type paths: src.io.TreeWalker.Paths
    :return: True if all expected files were found. False otherwise
    :rtype: bool
    """
    missing_files = find_missing_files(paths)
    if missing_files:
        _handle_missing_files(paths, missing_files)
        return False
    else:
        LOGGER.info(__name__,
                    f"All output files written for image: {paths.input_file}")
        return True
Beispiel #22
0
def masker_category_to_annotation_category(masker_cat, coco):
    """
    Convert from masker category to annotation category, using the category name.

    :param masker_cat: Masker category
    :type masker_cat: int
    :param coco: COCO object representing the dataset
    :type coco: COCO
    :return: Annotation category
    :rtype: int
    """
    masker_cat_name = LABEL_MAP[int(masker_cat)]
    for _id, cat_dict in coco.cats.items():
        if cat_dict["name"] == masker_cat_name:
            return _id

    LOGGER.info(__name__, f"Category {masker_cat} ({masker_cat_name}) not found in annotations. This detection will be "
                          f"ignored.")
    return None
Beispiel #23
0
def remove_empty_folders(start_dir, top_dir):
    """
    Bottom-up removal of empty folders. If `start_dir` is empty, it will be removed. If `start_dir`'s parent directory
    is empty after removing `start_dir`, it too will be removed. This process i continued until a parent is non-empty,
    or the current directory is equal to `top_dir`. (The `top_dir` directory will not be removed).

    NOTE: Use full paths when using this function, to avoid problems when comparing the current directory to `top_dir`.

    :param start_dir: Path to bottom directory to remove if empty.
    :type start_dir: str
    :param top_dir: Top directory. Only folders under this will be deleted.
    :type top_dir:
    """
    assert start_dir.startswith(top_dir), f"remove_empty_folders: Invalid top directory '{top_dir}' for start " \
                                          f"directory '{start_dir}'"
    current_dir = start_dir
    while not os.listdir(current_dir) and current_dir != top_dir:
        os.rmdir(current_dir)
        LOGGER.debug(__name__, f"Input folder removed: {current_dir}")
        current_dir = os.path.dirname(current_dir)
Beispiel #24
0
def check_config(args):
    """ Check that the specified configuration variables are valid. """
    if config.archive_json and not config.remote_json:
        raise ValueError("Parameter 'archive_json' requires remote_json=True.")
    if config.archive_mask and not config.remote_mask:
        raise ValueError("Parameter 'archive_mask' requires remote_mask=True.")

    if config.delete_input:
        LOGGER.warning(
            __name__,
            "Parameter 'delete_input' is enabled. This will permanently delete the original"
            " image from the input directory!")
        assert args.archive_folder, "Argument 'delete_input' requires a valid archive directory to be specified."

    if config.uncaught_exception_email or config.processing_error_email or config.finished_email:
        # Try to import the email_sender module, which checks if the `email_config.py` file is present.
        # Otherwise this will raise an exception prompting the user to create the file.
        import src.email_sender

    valid_log_levels = ["DEBUG", "INFO", "WARNING", "ERROR"]
    assert config.log_level in valid_log_levels, f"config.log_level must be one of {valid_log_levels}"
Beispiel #25
0
    def create_row(self, json_dict):
        """
        Create a database row from the given `json_dict`.

        :param json_dict: EXIF data
        :type json_dict: dict
        :return: Dict representing the database row.
        :rtype: dict
        """
        out = {}
        for col in self.columns:
            try:
                value = col.get_value(json_dict)
            except Exception as err:
                LOGGER.warning(
                    __name__,
                    f"Got error '{type(err).__name__}: {err}' while getting value for database "
                    f"column {col.name}. Value will be set to None")
                value = None
            out[col.name] = value
        return out
Beispiel #26
0
    def __init__(self,
                 input_folder,
                 mirror_folders,
                 skip_webp=True,
                 precompute_paths=True,
                 ext="jpg"):
        LOGGER.info(__name__,
                    f"Searching for {ext}-files in '{input_folder}'.")
        self.input_folder = input_folder
        self.mirror_folders = mirror_folders
        self.skip_webp = skip_webp
        self.precompute_paths = precompute_paths
        self.ext = ext
        self.n_valid_images = self.n_skipped_images = 0

        if self.precompute_paths:
            self.paths = [p for p in self._walk()]
            LOGGER.info(__name__,
                        f"Found {self.n_valid_images} valid {ext}-files.")
            if self.n_skipped_images > 0:
                LOGGER.info(
                    __name__,
                    f"Found {self.n_skipped_images} files with associated webp-files. "
                    f"These will be skipped.")
        else:
            self.paths = None
Beispiel #27
0
    def _path_is_valid(self, input_dir, mirror_dirs, filename):
        if not filename.endswith(self.ext):
            return False

        input_filepath = os.path.join(input_dir, filename)
        if not os.access(input_filepath, os.R_OK):
            LOGGER.info(__name__,
                        f"Could not read image file '{input_filepath}'")
            return False

        if self.skip_webp:
            webp_path = os.path.join(mirror_dirs[0], self._to_webp(filename))
            if os.path.exists(webp_path):
                LOGGER.debug(
                    __name__,
                    f"Mask already found for '{input_filepath}' at '{webp_path}'."
                )
                self.n_skipped_images += 1
                return False

        self.n_valid_images += 1
        return True
Beispiel #28
0
def clear_db_cache():
    """
    Traverse the database cache directory and insert all cached rows into the database. If insertion was successful, the
    cache files will be deleted.
    """
    if not os.path.isdir(DB_CACHE_DIR):
        return

    rows = []
    files = []
    for filename in os.listdir(DB_CACHE_DIR):
        if not filename.endswith(".pkl"):
            continue

        cache_file = os.path.join(DB_CACHE_DIR, filename)
        LOGGER.debug(__name__, f"Found database cache file: {cache_file}")
        # Load the cached row and append it to `rows`
        with open(cache_file, "rb") as f:
            rows.append(pickle.load(f))
        # Store the path to the cached row
        files.append(cache_file)

    # Return if we didn't find any valid rows.
    if not rows:
        return

    # Attempt to insert the rows into the database
    with DatabaseClient() as cli:
        cli._ignore_error_check = True
        try:
            cli.insert_or_update_rows(rows)
        except Exception as err:
            raise DatabaseError(
                f"Got error '{err}' when inserting cached rows into the database."
            ) from err

    # Remove the cache files
    for cache_file in files:
        os.remove(cache_file)
Beispiel #29
0
def main():
    tree_walker = initialize()

    for i, paths in enumerate(tree_walker.walk()):
        count_str = f"{i + 1} of {tree_walker.n_valid_images}"
        LOGGER.info(__name__, LOG_SEP)
        LOGGER.info(__name__, f"Iteration: {count_str}.")
        LOGGER.info(__name__, f"Processing file {paths.input_file}")

        try:
            worker = EXIFWorker(None, paths, None)
            worker.get()
        except PROCESSING_EXCEPTIONS as err:
            LOGGER.error(
                f"Got error '{type(err).__name__}: {str(err)}' when creating JSON from image. "
                f"File: {paths.input_file}")
Beispiel #30
0
    def insert_accumulated_rows(self):
        """
        Insert all accumulated rows into the database
        """
        try:
            # Insert the rows
            self.insert_or_update_rows(self.accumulated_rows)
            # Clear the list of accumulated rows
            self.accumulated_rows = []

            if self.enable_cache:
                # Delete the cached files
                while self.cached_rows:
                    cache_file = self.cached_rows.pop(0)
                    if os.path.exists(cache_file):
                        os.remove(cache_file)
                    else:
                        LOGGER.warning(
                            __name__,
                            f"Could not find cache file to remove: {cache_file}"
                        )

        except cxo.DatabaseError as err:
            raise DatabaseError(f"cx_Oracle.DatabaseError: {str(err)}")