Example #1
0
    def display_predictions(self, file, img_filename):
        log("[INFO][FLASK] Start parsing prediction file {}".format(file),
            self.verbose)

        # read/parse the file
        df = parse(file)
        top_k = 20
        img_filename = get_filename_from_path(img_filename)

        # predictions = []
        labels = []
        probabilities = []
        for idx, row in df.iterrows():
            filename = get_filename_from_path(row[1])
            # print("FILENAME=",filename, " img_filename=", img_filename)
            if img_filename == filename:

                if isvalid_prediction(row[2]):
                    for p in range(2,
                                   top_k + 2):  # columns start from 2 onwards
                        pred_arr = parse_prediction(row[p])
                        print(pred_arr[1],
                              pred_arr[2])  # label and probability
                        labels.append(pred_arr[1])
                        probabilities.append(pred_arr[2])

        # expected: data = [['church','house'],[0.9967,0.8909]];
        return [labels, probabilities]
Example #2
0
    def _load_model(self):

        # load the model - for now use inceptionV3 because testing shows this
        # model has better performance than ResNet, Xception and VGG16
        log("[INFO] Model and weights loaded...", self.verbose)

        Network = self.model
        return Network(weights=self.weight)
Example #3
0
    def search_gun(self, file, top_k=20, probability_threshold=0.50):
        log("[INFO] Start parsing prediction file {}".format(file),
            self.verbose)

        # read/parse the file
        df = parse(file)
        gun_list = self._search_gun(df, file, top_k, probability_threshold)

        return gun_list
Example #4
0
def parse_prediction(pred):
    arr = []
    try:
        pred_arr = eval(pred, {"__builtins__": None})
        if isinstance(pred_arr, list):
            arr = pred_arr
    except:
        log("[ERROR] Prediction file parsing error. File is does not follow the format."
            )

    return arr
Example #5
0
def parse_for_report(df, verbose, top_k=20, threshold=None, ascending=False):
    content = _parse_for_report(df, verbose, top_k, threshold, ascending)

    results = pd.DataFrame(list(content.items()), columns=['label', 'count'])
    results = results.sort_values(['count'], ascending=ascending)

    if verbose:
        log("[INFO] Top-20 labels sorted in descending order (highest first)",
            verbose)
        log(results.head(20), verbose)

    return results
Example #6
0
def parse_dir():
    DATASET_DIR = "dataset"
    OUTPUT_DIR = "output"
    folders = os.listdir(DATASET_DIR)  # get folders only

    arr = {}
    # read only from the dataset folders
    for f in folders:
        image_path = os.path.join(DATASET_DIR, f)
        if os.path.isdir(image_path):

            arr_ext = {}

            pathlist = Path(image_path).glob('**/*')
            for path in pathlist:
                if os.path.isfile(path):
                    filename, file_extension = os.path.splitext(str(path))
                    ext = file_extension.replace(".", "").lower()
                    if ext == '':
                        ext = 'NONE'  # some files might not have extensions like some carved images
                    elif ext.upper() not in EXTENSIONS_SUPPORTED:
                        ext = 'OTHERS'

                    if ext.lower() in arr_ext:
                        # if the extension already exists in the list, then add
                        arr_ext[ext.lower()] = arr_ext[ext.lower()] + 1
                    else:  # create
                        arr_ext[ext.lower()] = 1

            # TODO: add more columns to make the graph look nicer if at least one has less than x keys
            arr[get_filename_from_path(image_path)] = arr_ext

    arr_folders = []
    arr_images = []
    arr_values = []

    for k in arr:
        for i in arr[k]:
            arr_folders.append(k)
            arr_images.append(i)
            arr_values.append(arr[k].get(i))

    d = {'x': arr_folders, 'y': arr_images, 'value': arr_values}
    df = pd.DataFrame(d)

    ts = get_timestamp()
    heatmap_file = os.path.join(OUTPUT_DIR, "file_extensions_" + ts + ".csv")

    # show displayed file for download
    # df.to_csv(heatmap_file, index=False)
    log("[INFO] File created/saved: {}".format(heatmap_file), True)

    return arr_folders, arr_images, arr_values
Example #7
0
def parse_exif(file, image_path_list, verbose):
    log("[INFO] Parse exif from {}..".format(file), verbose)
    exif_data_list = []

    if len(image_path_list) > 0:
        # read/parse the file
        df = parse(file)
        image_dir = [convert_to_compare_path(x) for x in image_path_list]
        ## find the exif of this image path
        for idx, row in df.iterrows():
            if convert_to_compare_path(row.FileName) in image_dir:
                exif_data_list.append(row)

    return exif_data_list
Example #8
0
    def load(self, image_path, model="inception"):

        index_path = self.index_path

        # ----- load model -----
        # define a dictionary that maps model names to their classes
        models = {
            "vgg16": VGG16,
            "inception": InceptionV3,
            "xception": Xception,  # TensorFlow ONLY
            "resnet": ResNet50
        }

        weights = {
            "vgg16":
            os.path.join(index_path, 'models',
                         'vgg16_weights_tf_dim_ordering_tf_kernels.h5'),
            "inception":
            os.path.join(index_path, 'models',
                         'inception_v3_weights_tf_dim_ordering_tf_kernels.h5'),
            "xception":
            os.path.join(index_path, 'models',
                         'xception_weights_tf_dim_ordering_tf_kernels.h5'),
            # TensorFlow ONLY
            "resnet":
            os.path.join(index_path, 'models',
                         'resnet50_weights_tf_dim_ordering_tf_kernels.h5')
        }

        self.model = models[model]
        self.weight = weights[model]

        self.input_shape, self.preprocess = self._preprocess_data(model)

        log("[INFO] {} model used.".format(model), self.verbose)
        log("[INFO] Weight {} used".format(self.weight), self.verbose)

        # ----- load the image list -----
        img_list = [image_path]  # single image path
        # if(os.path.isdir(image_path)): # directory of images for prediction
        #    img_list = list( map(lambda x : os.path.join(image_path, x) , os.listdir(image_path) ))

        # iterate through everything in the directory including subfolders
        if os.path.isdir(image_path):  # directory of images for prediction
            img_list = []
            pathlist = Path(image_path).glob('**/*')
            for path in pathlist:
                img_list.append(str(path))  # because path is object not string

        self.image_list = img_list

        log("[INFO] Analyzing directory {}...".format(image_path),
            self.verbose)
        log(
            "[INFO] Number of files found for analysis: {}".format(
                len(img_list)), self.verbose)
Example #9
0
def extract_exif(image_path, verbose):
    log("[INFO] Extract exif information for {}..".format(image_path), verbose)

    exif_data = get_exif_data(Image.open(image_path))

    # if exif_data is empty, just add resolution (width and height)
    if len(exif_data) == 0:
        exif_data['ExifImageWidth'], exif_data['ExifImageHeight'] = Image.open(
            image_path).size
    else:
        lat, lon = get_lat_lon(exif_data)
        exif_data['GPSLatitude'] = lat
        exif_data['GPSLongitude'] = lon

    # image full path instead of fname only
    exif_data['FileName'] = image_path

    return exif_data
Example #10
0
    def search_list(self,
                    file,
                    find_me_list,
                    top_k=20,
                    probability_threshold=0.50):
        log("[INFO] Start parsing prediction file {}".format(file),
            self.verbose)

        # read/parse the file
        df = parse(file)

        gun_list = []
        if is_gun_in_list(find_me_list):
            # if there is a gun in the list, reuse gun search function
            gun_list = self._search_gun(df, file, top_k, probability_threshold)

        # no-guns
        find_me_list = extract_non_gun_items(find_me_list)
        img_list = []
        if find_me_list:  # not empty
            img_list = self._search_list(df, find_me_list, top_k,
                                         probability_threshold)

        return gun_list + img_list
Example #11
0
def report():
    # --- argument list --- #
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-prediction",
                    "--prediction_file",
                    required=True,
                    help="path to the prediction file to be parsed")
    ap.add_argument('-v',
                    '--verbose',
                    action='store_true',
                    help="Print lots of debugging statements")
    ap.add_argument("-k",
                    "--top_k",
                    type=int,
                    default=20,
                    help="retrieve the top-k predictions, default is 20")
    ap.add_argument(
        "-t",
        "--threshold",
        type=float,
        default=0.50,
        help="probability threshold value in decimals ex. 0.75, default is 0.50"
    )

    args = vars(ap.parse_args())

    # --- validation --- #
    # ensure that the arguments supplied are pathnames
    if not os.path.isfile(args["prediction_file"]):
        raise AssertionError(
            "The --prediction_file command line argument should exist and should be write-able."
        )
    verbose = False
    if args["verbose"]:
        verbose = True

    threshold = None
    if args["threshold"]:
        threshold = args["threshold"]

    top_k = 20
    if args["top_k"]:
        top_k = args["top_k"]

    pred_file = args["prediction_file"]
    verbose = False
    if args["verbose"]:
        verbose = True

    always_verbose = True
    # show argument list
    s = "[INFO] Argument List:\n" + "\n".join([("-->{}: {}".format(x, args[x]))
                                               for x in args])
    log(s, always_verbose)  # always display

    log("[INFO] Parsing file {} for report".format(pred_file), always_verbose)
    # read/parse the file
    df = parse(pred_file)
    results = parse_for_report(df, verbose, top_k, threshold)

    # save the file
    # save in the same folder as the prediction file
    output_path = os.path.dirname(os.path.abspath(pred_file))
    filenames = get_filenames_in_csv(output_path, ["summary_predictions"],
                                     get_timestamp())

    save([results], output_path, filenames)

    log("[INFO] Total labels:{}".format(len(results['label'])), always_verbose)
    log("[INFO] Completed Report ", always_verbose)
Example #12
0
    def process(self):

        # process top-20 predictions
        k = 20

        # load the model and its weights
        model = self._load_model()

        img_list = self.image_list

        predictions = [None] * len(img_list)
        exif_list = []
        keys_list = []
        unprocessed = []

        for i, image_path in enumerate(img_list):

            # get filename
            l = os.path.normpath(image_path).split(os.sep)
            img_fname = l[len(l) - 1]

            # save predictions
            # predictions[i] = [img_fname]
            # image full path instead of fname only
            predictions[i] = [image_path]
            try:
                if not is_image(image_path):
                    raise Exception
                else:  # allow only valid images
                    image = self._load_image(image_path)
                    # classify the image
                    log("[INFO] Classifying image {}".format(img_fname),
                        self.verbose)

                    # predict the image
                    preds = model.predict(image)
                    P = imagenet_utils.decode_predictions(preds, top=k)

                    # for unit test
                    self.decoded_predictions = P

                    # save predictions
                    for (j, (imagenetID, label, prob)) in enumerate(P[0]):
                        predictions[i].append([imagenetID, label, prob])
                        # loop over the predictions and display the rank-k predictions +
                        # probabilities to our terminal
                        # print in terminal only, do not log because this is already in predictions file
                        if self.verbose:
                            print("{}. {}: {:.2f}%".format(
                                j + 1, label, prob * 100))

                    # extract the exif information
                    exif_data = extract_exif(image_path, self.verbose)

                    # store exif data per image
                    exif_list.append(exif_data)

                    # needed to know which key has the max columns for the df
                    keys_list.append(len(exif_data.keys()))
            except:
                # unprocessed, separate into its own file
                unprocessed.append(image_path)
                predictions[i].append([0, "---", 0])
                # error in prediction
                log("[ERROR] Cannot process image {}".format(img_fname),
                    self.verbose)

        # we dont know which key has the most number of columns (exif data)
        # so take the key with the max value and use this as reference
        max_k = get_key_of_max_value(keys_list)

        # # convert to df to be saved
        df1 = pd.DataFrame(predictions)
        df2 = pd.DataFrame(exif_list, columns=exif_list[max_k].keys())
        df3 = pd.DataFrame(unprocessed)
        self.data = [df1, df2, df3]
Example #13
0
def predict():
    # --- argument list --- #
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-i",
                    "--image_path",
                    required=True,
                    help="path to the input image or image directory")
    ap.add_argument("-model",
                    "--model",
                    type=str,
                    default="inception",
                    choices=['inception', 'vgg16', 'xception', 'resnet'],
                    help="name of pre-trained network to use(not implemented)")
    ap.add_argument(
        "-o",
        "--output_folder",
        type=str,
        help=
        "folder name saved in cbis/output/ where prediction files will be saved"
    )
    ap.add_argument('-v',
                    '--verbose',
                    action='store_true',
                    help="Print lots of debugging statements")
    args = vars(ap.parse_args())

    model_list = ["vgg16", "inception", "xception", "resnet"]

    # --- validation --- #
    # ensure a valid model name was supplied via command line argument
    if args["model"] not in model_list:
        raise AssertionError(
            "The --model command line argument should be one from this list [vgg16,inception,xception,resnet]."
        )

    # ensure that the path is write-able or exists
    if not os.path.isdir(args["image_path"]) | os.path.isfile(
            args["image_path"]):
        raise AssertionError(
            "The --image_path command line argument should exist and should be write-able."
        )

    model = args["model"]
    img_path = args["image_path"]
    folder_out = args[
        "output_folder"]  # if empty it will take the dataset foldername
    verbose = False
    if args["verbose"]:
        verbose = True

    always_verbose = True
    # show argument list
    s = "[INFO] Argument List:\n" + "\n".join([("-->{}: {}".format(x, args[x]))
                                               for x in args])
    log(s, always_verbose)  # always display

    log("[INFO] Starting to load and index the path {} ...".format(img_path),
        always_verbose)

    # define the folder name
    ts = get_timestamp()
    output_dir = get_output_directory(ts, img_path, folder_out)

    # load the model and index the results
    loader = Loader(index_path=INDEX_PATH,
                    output=output_dir,
                    timestamp=ts,
                    verbose=verbose)

    loader.load(image_path=img_path, model=model)

    loader.process()
    loader.save_predictions()

    log("[INFO] Completed Loading and Indexing of Results", always_verbose)
Example #14
0
def search():
    # --- argument list --- #
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-prediction", "--prediction_file", required=True,
                    help="path to the prediction file to be parsed")
    ap.add_argument("-exif", "--exif_file", required=True,
                    help="path to the exif file to be parsed")
    # mode to search for guns or just objects
    ap.add_argument("-s", "--search_list", type=str, default="gun",
                    help="list of search items delimited by comma")
    ap.add_argument('-v', '--verbose', action='store_true',
                    help="Print lots of debugging statements")
    ap.add_argument("-k", "--top_k", type=int, default=20,
                    help="retrieve the top-k predictions, default is 20")
    ap.add_argument("-t", "--threshold", type=float,
                    help="probability threshold value in decimals ex. 0.75, default is 0.50")

    args = vars(ap.parse_args())

    # --- validation --- #
    # ensure that the arguments supplied are pathnames
    if not os.path.isfile(args["prediction_file"]):
        raise AssertionError("The --prediction_file command line argument should exist and should be write-able.")

    # ensure that the path is write-able or exists
    if not os.path.isfile(args["exif_file"]):
        raise AssertionError("The --exif_file command line argument should exist and should be write-able.")

    pred_file = args["prediction_file"]
    exif_file = args["exif_file"]
    verbose = False
    if args["verbose"]:
        verbose = True

    threshold = None
    if args["threshold"]:
        threshold = args["threshold"]

    top_k = 20
    if args["top_k"]:
        top_k = args["top_k"]

    always_verbose = True
    # show argument list
    s = "[INFO] Argument List:\n" + "\n".join([("-->{}: {}".format(x, args[x])) for x in args])
    log(s, always_verbose)  # always display

    # TODO - list suggestion from imagenet 1000 classes
    # DEBUG:
    # args["search_list"] = "gun,water,SCUBA diver, van" 
    mode, search_list = validate_search_items(args["search_list"])

    log("[INFO] Starting to load prediction file {} and exif file {} ...".format(pred_file, exif_file), always_verbose)
    index_path = os.path.dirname(__file__)
    s = Searcher(index_path, verbose)
    image_list = []
    image_path_list = []

    ## --------------- prediction --------------- ##
    if 1 == mode:  # guns
        image_list = s.search_gun(pred_file, top_k, threshold)

    elif 2 == mode:  # others
        image_list = s.search_list(pred_file, search_list, top_k, threshold)

    else:
        print("[INFO] Dont waste my time, nothing to search so no results found")

    if len(image_list) > 0:
        image_path_list = list(map(lambda x: x[0], image_list))
        if verbose:
            for img in image_list:
                log("\t{} | {} | {:.2f}%".format(img[0], img[1], float(img[2]) * 100), verbose)

    log("[INFO] Total images found: {}".format(len(image_path_list)), always_verbose)

    ## --------------- exif info --------------- ##
    exif_info = parse_exif(exif_file, image_path_list, verbose)
    log("[INFO] Total exif information: {}".format(len(exif_info)), always_verbose)

    # print("length exif_info=",len(exif_info))
    # print(image_list)
    # TODO: Convert to json results 

    log("[INFO] Completed Search ", always_verbose)