Esempio n. 1
0
    def process_image(self, image, paths):
        """
        Run the processing pipeline for `image`.

        :param image: Input image. Must be a 4D color image tensor with shape (1, height, width, 3)
        :type image: tf.python.framework.ops.EagerTensor
        :param paths: Paths object representing the image file.
        :type paths: src.io.TreeWalker.Paths
        """
        start_time = time.time()
        # Compute the detected objects and their masks.
        mask_results = self.masker.mask(image)
        time_delta = "{:.3f}".format(time.time() - start_time)
        LOGGER.info(
            __name__,
            f"Masked image in {time_delta} s. File: {paths.input_file}")

        # Convert the image to a numpy array
        if not isinstance(image, np.ndarray):
            image = image.numpy()

        # If we have reached the maximum number of workers. Wait for them to finish
        if len(self.workers) >= self.max_num_async_workers:
            self._wait_for_workers()
        # Create workers for the current image.
        self._spawn_workers(paths, image, mask_results)
Esempio n. 2
0
def get_results(coco, imgs_dir):
    """
    Get the masking results for all images in `imgs_dir`.

    :param coco: COCO object representing the dataset.
    :type coco: COC
    :param imgs_dir: Path to base directory with images to use for evaluation.
    :type imgs_dir: str
    :return: Masking results. Image IDs are keys, and masking results (output from `Masker.mask`) are values.
    :rtype: dict
    """
    LOGGER.info(__name__, "Building results.")

    tree_walker = TreeWalker(imgs_dir, [], skip_webp=False, precompute_paths=True)
    dataset = get_tf_dataset(tree_walker)
    dataset_iterator = iter(dataset)

    filename_to_image_id = {img_dict["file_name"]: _id for _id, img_dict in coco.imgs.items()}
    masker = Masker()
    results = {}

    for i, paths in enumerate(tree_walker.walk()):
        tic = time.time()

        img = next(dataset_iterator)
        mask_results = masker.mask(img)
        image_id = filename_to_image_id[paths.filename]
        results[image_id] = mask_results

        dt = time.time() - tic
        LOGGER.info(__name__, f"Processed image {i+1}/{tree_walker.n_valid_images} in {round(dt, 2)} s. "
                              f"File: {paths.filename}")
    return results
Esempio n. 3
0
    def _init_model(self):
        """
        Initialize the TensorFlow-graph
        """
        saved_model_path = os.path.join(config.MODEL_PATH, "saved_model")
        # Download and extract model

        if not os.path.exists(saved_model_path):
            LOGGER.info(__name__, "Could not find the model graph file. Downloading...")
            download_model(config.DOWNLOAD_BASE, config.MODEL_NAME, config.MODEL_PATH, extract_all=True)
            LOGGER.info(__name__, "Model graph file downloaded.")

        model = tf.saved_model.load(saved_model_path)
        self.model = model.signatures["serving_default"]
Esempio n. 4
0
def clear_cache_file(file_path):
    """
    Clear the output files for the unfinished image whose cahce file is located at `file_path`

    :param file_path: Path to cache file for unfinished image
    :type file_path: str
    """
    # Read the JSON file
    try:
        with open(file_path, "r") as f:
            cache_info = json.load(f)
    except json.JSONDecodeError:
        # If we got a JSONDecodeError, it was most likely because the program was killed before it finished writing the
        # file. Since cache file writing is the first step when exporting the output images, we have no output images to
        # clean up. We therefore remove the (incomplete) cache file and continue.
        os.remove(file_path)
        return

    # Create a `src.io.TreeWalker.Paths` object representing the image
    paths = Paths(base_input_dir=cache_info["base_input_dir"],
                  base_mirror_dirs=cache_info["base_mirror_dirs"],
                  input_dir=cache_info["input_dir"],
                  mirror_dirs=cache_info["mirror_dirs"],
                  filename=cache_info["filename"])
    # Wait for the directories if they cannot be reached
    try:
        wait_until_path_is_found(
            [paths.base_input_dir, *paths.base_mirror_dirs])
    except PathNotReachableError as err:
        raise PathNotReachableError(f"The directories pointed to by the cache file '{file_path}' could not be found. If"
                                    f" they were deleted manually, delete this cache file and run the program again")\
                                   from err

    # Remove any expected output files if they are present
    for expected_file in get_expected_files(paths):
        if os.path.isfile(expected_file):
            os.remove(expected_file)
            LOGGER.info(
                __name__,
                f"Removed file '{expected_file}' for unfinished image '{paths.input_file}'"
            )
        else:
            LOGGER.debug(
                __name__,
                f"Could not find file '{expected_file}' for unfinished image '{paths.input_file}'"
            )
    # Remove the cache file
    os.remove(file_path)
Esempio n. 5
0
    def _update_rows(self, cursor, rows):
        LOGGER.info(
            __name__,
            f"Attempting to update {len(rows)} row(s) in the database.")
        # Attempt to update the rows. When we have `batcherrors = True`, the valid rows will be updated normally.
        cursor.executemany(self.table.update_sql, rows, batcherrors=True)
        # Get the errors caused by the rows where the update failed.
        errors = [e for e in cursor.getbatcherrors()]

        # Add number of updated rows to total counter
        n_updated = len(rows) - len(errors)
        self.total_updated += n_updated

        LOGGER.info(
            __name__,
            f"Successfully updated {n_updated} row(s) in the database.")
        return errors
Esempio n. 6
0
def clear_cache():
    """
    Clear the cache directory. Each JSON file in the cache directory is expected to represent an image for which the
    export process was aborted due to a critical error. This function will clear the output files written for the
    unfinished image, and then delete the cache file.
    """
    # Return if we couldn't find a cache directory. This probably means that this is the first time the application is
    # ran on this machine, so the cache directory has not been created yet
    if not os.path.exists(config.CACHE_DIRECTORY):
        return

    LOGGER.info(__name__, "Clearing cache files")
    count = 0
    for filename in os.listdir(config.CACHE_DIRECTORY):
        if filename.endswith(".json"):
            clear_cache_file(os.path.join(config.CACHE_DIRECTORY, filename))
            count += 1
    LOGGER.info(__name__, f"Found and cleared {count} cache file(s)")
Esempio n. 7
0
    def _insert_rows(self, cursor, rows):
        LOGGER.info(
            __name__,
            f"Attempting to insert {len(rows)} row(s) into the database.")
        # Attempt to insert the rows into the database. When we have `batcherrors = True`, the rows which do not
        # violate the unique constraint will be inserted normally. The rows which do violate the constraint will
        # not be inserted.
        cursor.executemany(self.table.insert_sql, rows, batcherrors=True)
        # Get the indices of the rows where the insertion failed.
        errors = [e for e in cursor.getbatcherrors()]

        # Add number of inserted rows to total counter
        n_inserted = len(rows) - len(errors)
        self.total_inserted += n_inserted

        LOGGER.info(
            __name__,
            f"Successfully inserted {n_inserted} row(s) into the database.")
        return errors
Esempio n. 8
0
def masker_category_to_annotation_category(masker_cat, coco):
    """
    Convert from masker category to annotation category, using the category name.

    :param masker_cat: Masker category
    :type masker_cat: int
    :param coco: COCO object representing the dataset
    :type coco: COCO
    :return: Annotation category
    :rtype: int
    """
    masker_cat_name = LABEL_MAP[int(masker_cat)]
    for _id, cat_dict in coco.cats.items():
        if cat_dict["name"] == masker_cat_name:
            return _id

    LOGGER.info(__name__, f"Category {masker_cat} ({masker_cat_name}) not found in annotations. This detection will be "
                          f"ignored.")
    return None
Esempio n. 9
0
def check_all_files_written(paths):
    """
    Check that all files for a given image have been saved correctly. The list of checked files is determined by the
    File I/O parameters in `config`. If all expected output files exist, the cache file will be deleted. If all expected
    output files exist, AND `config.delete_input` is True, the input image will be deleted as well.

    :param paths: Paths object representing the input image
    :type paths: src.io.TreeWalker.Paths
    :return: True if all expected files were found. False otherwise
    :rtype: bool
    """
    missing_files = find_missing_files(paths)
    if missing_files:
        _handle_missing_files(paths, missing_files)
        return False
    else:
        LOGGER.info(__name__,
                    f"All output files written for image: {paths.input_file}")
        return True
Esempio n. 10
0
    def __init__(self,
                 input_folder,
                 mirror_folders,
                 skip_webp=True,
                 precompute_paths=True,
                 ext="jpg"):
        LOGGER.info(__name__,
                    f"Searching for {ext}-files in '{input_folder}'.")
        self.input_folder = input_folder
        self.mirror_folders = mirror_folders
        self.skip_webp = skip_webp
        self.precompute_paths = precompute_paths
        self.ext = ext
        self.n_valid_images = self.n_skipped_images = 0

        if self.precompute_paths:
            self.paths = [p for p in self._walk()]
            LOGGER.info(__name__,
                        f"Found {self.n_valid_images} valid {ext}-files.")
            if self.n_skipped_images > 0:
                LOGGER.info(
                    __name__,
                    f"Found {self.n_skipped_images} files with associated webp-files. "
                    f"These will be skipped.")
        else:
            self.paths = None
Esempio n. 11
0
    def _path_is_valid(self, input_dir, mirror_dirs, filename):
        if not filename.endswith(self.ext):
            return False

        input_filepath = os.path.join(input_dir, filename)
        if not os.access(input_filepath, os.R_OK):
            LOGGER.info(__name__,
                        f"Could not read image file '{input_filepath}'")
            return False

        if self.skip_webp:
            webp_path = os.path.join(mirror_dirs[0], self._to_webp(filename))
            if os.path.exists(webp_path):
                LOGGER.debug(
                    __name__,
                    f"Mask already found for '{input_filepath}' at '{webp_path}'."
                )
                self.n_skipped_images += 1
                return False

        self.n_valid_images += 1
        return True
Esempio n. 12
0
def main():
    tree_walker = initialize()

    for i, paths in enumerate(tree_walker.walk()):
        count_str = f"{i + 1} of {tree_walker.n_valid_images}"
        LOGGER.info(__name__, LOG_SEP)
        LOGGER.info(__name__, f"Iteration: {count_str}.")
        LOGGER.info(__name__, f"Processing file {paths.input_file}")

        try:
            worker = EXIFWorker(None, paths, None)
            worker.get()
        except PROCESSING_EXCEPTIONS as err:
            LOGGER.error(
                f"Got error '{type(err).__name__}: {str(err)}' when creating JSON from image. "
                f"File: {paths.input_file}")
Esempio n. 13
0
def main():
    tree_walker, database_client = initialize()
    start_datetime = datetime.now()

    for i, paths in enumerate(tree_walker.walk()):
        count_str = f"{i + 1} of {tree_walker.n_valid_images}"
        LOGGER.info(__name__, LOG_SEP)
        LOGGER.info(__name__, f"Iteration: {count_str}.")
        LOGGER.info(__name__, f"Processing file {paths.input_file}")

        try:
            json_dict = load_json(paths)
            database_client.add_row(json_dict)
        except PROCESSING_EXCEPTIONS as err:
            LOGGER.error(
                __name__,
                f"Got error '{type(err).__name__}: {str(err)}' when writing JSON to Database. "
                f"File: {paths.input_file}")

    LOGGER.info(__name__, LOG_SEP)
    LOGGER.info(__name__, "Writing remaining files to Database")
    database_client.close()

    summary_str = get_summary(tree_walker, database_client, start_datetime)
    LOGGER.info(__name__, LOG_SEP)
    LOGGER.info(__name__, summary_str)
Esempio n. 14
0
def main():
    """Run the masking."""
    # Initialize
    start_datetime = datetime.now()
    args, tree_walker, image_processor, dataset_iterator = initialize()
    n_imgs = "?" if config.lazy_paths else (tree_walker.n_valid_images +
                                            tree_walker.n_skipped_images)

    # Mask images
    time_at_iter_start = time.time()
    for i, paths in enumerate(tree_walker.walk()):
        count_str = f"{tree_walker.n_skipped_images + i + 1} of {n_imgs}"
        start_time = time.time()
        LOGGER.set_state(paths)
        LOGGER.info(__name__, LOG_SEP)
        LOGGER.info(__name__, f"Iteration: {count_str}.")

        # Catch potential exceptions raised while processing the image
        try:
            # Get the image
            img = next(dataset_iterator)
            # Do the processing
            image_processor.process_image(img, paths)
        except PROCESSING_EXCEPTIONS as err:
            error_msg = f"'{str(err)}'. File: {paths.input_file}"
            LOGGER.error(__name__,
                         error_msg,
                         save=True,
                         email=True,
                         email_mode="error")
            continue

        est_done = get_estimated_done(time_at_iter_start, n_imgs, i + 1)
        iter_time_delta = "{:.3f}".format(time.time() - start_time)
        LOGGER.info(__name__, f"Iteration finished in {iter_time_delta} s.")
        LOGGER.info(__name__, f"Estimated completion: {est_done}")

    # Close the image_processor. This will make sure that all exports are finished before we continue.
    LOGGER.info(__name__, LOG_SEP)
    LOGGER.info(__name__, f"Writing output files for the remaining images.")
    image_processor.close()

    # Summary
    summary_str = get_summary(tree_walker, image_processor, start_datetime)
    LOGGER.info(__name__, LOG_SEP)
    LOGGER.info(__name__, summary_str, email=True, email_mode="finished")
Esempio n. 15
0
def initialize():
    """
    Get command line arguments, and initialize the TreeWalker and Masker.

    :return: Command line arguments, an instance of `TreeWalker` initialized at the specified directories, and an
             instance of `Masker` ready for masking.
    :rtype: argparse.Namespace, TreeWalker, Masker
    """
    # Register the logging excepthook
    except_hooks = [logger_excepthook]

    if config.uncaught_exception_email:
        # Register a custom excepthook which sends an email on uncaught exceptions.
        from src.email_sender import email_excepthook
        except_hooks.append(email_excepthook)

    # Set the exception hook(s)
    set_excepthook(except_hooks)

    # Get arguments
    args = get_args()
    # Check that the config and command line arguments are valid
    check_config(args)

    # Configure logger
    logging.basicConfig(level=getattr(logging, config.log_level),
                        format=LOGGER.fmt,
                        datefmt=LOGGER.datefmt)

    # Set log file
    if args.log_folder is not None:
        os.makedirs(args.log_folder, exist_ok=True)
        log_file_name = config.log_file_name.format(
            datetime=datetime.now().strftime("%Y-%m-%d_%H%M%S"),
            hostname=gethostname())
        log_file = os.path.join(args.log_folder, log_file_name)
        LOGGER.set_log_file(log_file)

    # Log the call
    LOGGER.info(__name__, f"Call: {' '.join(sys.argv)}")
    # Log the current config.
    LOGGER.info(__name__, "\n" + config_string())

    if args.clear_cache:
        # Clear any cached files
        clear_cache()
        # Clear the database cache if database writing is enabled
        if config.write_exif_to_db:
            from src.db.DatabaseClient import clear_db_cache
            clear_db_cache()

    # Get the absolute path of the directories
    base_input_dir = os.path.abspath(args.input_folder)
    base_output_dir = os.path.abspath(args.output_folder)
    mirror_dirs = [base_output_dir]
    # Make the output directory
    os.makedirs(base_output_dir, exist_ok=True)

    if args.archive_folder is not None:
        base_archive_dir = os.path.abspath(args.archive_folder)
        mirror_dirs.append(base_archive_dir)
        # Make the archive directory
        os.makedirs(base_archive_dir, exist_ok=True)

    # Make the cache directory
    os.makedirs(config.CACHE_DIRECTORY, exist_ok=True)

    # Configure the logger
    LOGGER.base_input_dir = base_input_dir
    LOGGER.base_output_dir = base_output_dir

    # Initialize the walker
    tree_walker = TreeWalker(base_input_dir,
                             mirror_dirs,
                             skip_webp=(not config.force_remask),
                             precompute_paths=(not config.lazy_paths))
    # Initialize the masker
    masker = Masker(mask_dilation_pixels=config.mask_dilation_pixels,
                    max_num_pixels=config.max_num_pixels)
    # Create the TensorFlow datatset
    dataset_iterator = iter(get_tf_dataset(tree_walker))
    # Initialize the ImageProcessor
    image_processor = ImageProcessor(
        masker=masker, max_num_async_workers=config.max_num_async_workers)
    return args, tree_walker, image_processor, dataset_iterator
Esempio n. 16
0
def process_image_properties(contents, parsed_exif):
    """
    Process the `ImageProperties` XML from the EXIF header

    :param contents: XML-contents
    :type contents: bytes
    :param parsed_exif: Dictionary to hold the extracted values
    :type parsed_exif: dict
    :return: Relevant information extracted from `contents`
    :rtype: dict
    """
    contents = to_pretty_xml(contents)
    contents = redact_image_properties(contents)
    image_properties = xmltodict.parse(contents)["ImageProperties"]

    # Set a "default" quality. This will be adjusted if we encounter missing values
    quality = EXIF_QUALITIES["good"]

    # Position
    geo_tag = image_properties.get("GeoTag", None)
    if geo_tag is not None:
        ewkt = f"srid=4326;POINT Z( {geo_tag['dLongitude']} {geo_tag['dLatitude']} {geo_tag['dAltitude']} )"
    else:
        ewkt = None
        quality = EXIF_QUALITIES["missing_values"]

    # Speed and heading
    heading = image_properties.get("Heading", None)
    if heading == "NaN":
        heading = None
    speed = image_properties.get("Speed", None)
    if speed == "NaN":
        speed = None

    # Pent formatterte mappenavn
    mappenavn = re.sub(r"\\", "/", image_properties["ImageName"])
    mapper = mappenavn.split("/")

    timestamp = image_properties["@Date"]
    date = timestamp.split("T")[0]
    exif_veg = image_properties["VegComValues"]["VCRoad"]

    if len(exif_veg) >= 3:
        exif_vegnr = exif_veg[2:].lstrip("0")
        exif_vegstat = exif_veg[1]
        exif_vegkat = exif_veg[0]
    else:
        exif_vegnr = exif_veg.lstrip("0")
        exif_vegstat = None
        exif_vegkat = None

    if exif_vegstat not in LOVLIG_VEGSTATUS or exif_vegkat not in LOVLIG_VEGKATEGORI:
        LOGGER.info(
            __name__,
            f"VCRoad={exif_veg} følger ikke KAT+STAT+vegnr syntaks: {mappenavn}"
        )

    hp, strekning, delstrekning, ankerpunkt, kryssdel, sideanleggsdel = process_strekning_and_kryss(
        vchp=image_properties["VegComValues"]["VCHP"], filename=mapper[-1])

    # Set values
    parsed_exif["exif_tid"] = timestamp
    parsed_exif["exif_dato"] = date
    parsed_exif["exif_speed"] = speed
    parsed_exif["exif_heading"] = heading
    parsed_exif["exif_gpsposisjon"] = ewkt
    parsed_exif["exif_strekningsnavn"] = image_properties["VegComValues"][
        "VCArea"]
    parsed_exif["exif_fylke"] = image_properties["VegComValues"]["VCCountyNo"]
    parsed_exif["exif_vegkat"] = exif_vegkat
    parsed_exif["exif_vegstat"] = exif_vegstat
    parsed_exif["exif_vegnr"] = exif_vegnr
    parsed_exif["exif_hp"] = hp
    parsed_exif["exif_strekning"] = strekning
    parsed_exif["exif_delstrekning"] = delstrekning
    parsed_exif["exif_ankerpunkt"] = ankerpunkt
    parsed_exif["exif_kryssdel"] = kryssdel
    parsed_exif["exif_sideanleggsdel"] = sideanleggsdel
    parsed_exif["exif_meter"] = image_properties["VegComValues"]["VCMeter"]
    parsed_exif["exif_feltkode"] = image_properties["VegComValues"]["VCLane"]
    parsed_exif["exif_mappenavn"] = "/".join(mapper[0:-1])
    parsed_exif["exif_filnavn"] = mapper[-1]
    parsed_exif["exif_strekningreferanse"] = "/".join(mapper[-4:-2])
    parsed_exif["exif_imageproperties"] = contents
    parsed_exif["exif_kvalitet"] = quality