def display_predictions(self, file, img_filename): log("[INFO][FLASK] Start parsing prediction file {}".format(file), self.verbose) # read/parse the file df = parse(file) top_k = 20 img_filename = get_filename_from_path(img_filename) # predictions = [] labels = [] probabilities = [] for idx, row in df.iterrows(): filename = get_filename_from_path(row[1]) # print("FILENAME=",filename, " img_filename=", img_filename) if img_filename == filename: if isvalid_prediction(row[2]): for p in range(2, top_k + 2): # columns start from 2 onwards pred_arr = parse_prediction(row[p]) print(pred_arr[1], pred_arr[2]) # label and probability labels.append(pred_arr[1]) probabilities.append(pred_arr[2]) # expected: data = [['church','house'],[0.9967,0.8909]]; return [labels, probabilities]
def _load_model(self): # load the model - for now use inceptionV3 because testing shows this # model has better performance than ResNet, Xception and VGG16 log("[INFO] Model and weights loaded...", self.verbose) Network = self.model return Network(weights=self.weight)
def search_gun(self, file, top_k=20, probability_threshold=0.50): log("[INFO] Start parsing prediction file {}".format(file), self.verbose) # read/parse the file df = parse(file) gun_list = self._search_gun(df, file, top_k, probability_threshold) return gun_list
def parse_prediction(pred): arr = [] try: pred_arr = eval(pred, {"__builtins__": None}) if isinstance(pred_arr, list): arr = pred_arr except: log("[ERROR] Prediction file parsing error. File is does not follow the format." ) return arr
def parse_for_report(df, verbose, top_k=20, threshold=None, ascending=False): content = _parse_for_report(df, verbose, top_k, threshold, ascending) results = pd.DataFrame(list(content.items()), columns=['label', 'count']) results = results.sort_values(['count'], ascending=ascending) if verbose: log("[INFO] Top-20 labels sorted in descending order (highest first)", verbose) log(results.head(20), verbose) return results
def parse_dir(): DATASET_DIR = "dataset" OUTPUT_DIR = "output" folders = os.listdir(DATASET_DIR) # get folders only arr = {} # read only from the dataset folders for f in folders: image_path = os.path.join(DATASET_DIR, f) if os.path.isdir(image_path): arr_ext = {} pathlist = Path(image_path).glob('**/*') for path in pathlist: if os.path.isfile(path): filename, file_extension = os.path.splitext(str(path)) ext = file_extension.replace(".", "").lower() if ext == '': ext = 'NONE' # some files might not have extensions like some carved images elif ext.upper() not in EXTENSIONS_SUPPORTED: ext = 'OTHERS' if ext.lower() in arr_ext: # if the extension already exists in the list, then add arr_ext[ext.lower()] = arr_ext[ext.lower()] + 1 else: # create arr_ext[ext.lower()] = 1 # TODO: add more columns to make the graph look nicer if at least one has less than x keys arr[get_filename_from_path(image_path)] = arr_ext arr_folders = [] arr_images = [] arr_values = [] for k in arr: for i in arr[k]: arr_folders.append(k) arr_images.append(i) arr_values.append(arr[k].get(i)) d = {'x': arr_folders, 'y': arr_images, 'value': arr_values} df = pd.DataFrame(d) ts = get_timestamp() heatmap_file = os.path.join(OUTPUT_DIR, "file_extensions_" + ts + ".csv") # show displayed file for download # df.to_csv(heatmap_file, index=False) log("[INFO] File created/saved: {}".format(heatmap_file), True) return arr_folders, arr_images, arr_values
def parse_exif(file, image_path_list, verbose): log("[INFO] Parse exif from {}..".format(file), verbose) exif_data_list = [] if len(image_path_list) > 0: # read/parse the file df = parse(file) image_dir = [convert_to_compare_path(x) for x in image_path_list] ## find the exif of this image path for idx, row in df.iterrows(): if convert_to_compare_path(row.FileName) in image_dir: exif_data_list.append(row) return exif_data_list
def load(self, image_path, model="inception"): index_path = self.index_path # ----- load model ----- # define a dictionary that maps model names to their classes models = { "vgg16": VGG16, "inception": InceptionV3, "xception": Xception, # TensorFlow ONLY "resnet": ResNet50 } weights = { "vgg16": os.path.join(index_path, 'models', 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'), "inception": os.path.join(index_path, 'models', 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5'), "xception": os.path.join(index_path, 'models', 'xception_weights_tf_dim_ordering_tf_kernels.h5'), # TensorFlow ONLY "resnet": os.path.join(index_path, 'models', 'resnet50_weights_tf_dim_ordering_tf_kernels.h5') } self.model = models[model] self.weight = weights[model] self.input_shape, self.preprocess = self._preprocess_data(model) log("[INFO] {} model used.".format(model), self.verbose) log("[INFO] Weight {} used".format(self.weight), self.verbose) # ----- load the image list ----- img_list = [image_path] # single image path # if(os.path.isdir(image_path)): # directory of images for prediction # img_list = list( map(lambda x : os.path.join(image_path, x) , os.listdir(image_path) )) # iterate through everything in the directory including subfolders if os.path.isdir(image_path): # directory of images for prediction img_list = [] pathlist = Path(image_path).glob('**/*') for path in pathlist: img_list.append(str(path)) # because path is object not string self.image_list = img_list log("[INFO] Analyzing directory {}...".format(image_path), self.verbose) log( "[INFO] Number of files found for analysis: {}".format( len(img_list)), self.verbose)
def extract_exif(image_path, verbose): log("[INFO] Extract exif information for {}..".format(image_path), verbose) exif_data = get_exif_data(Image.open(image_path)) # if exif_data is empty, just add resolution (width and height) if len(exif_data) == 0: exif_data['ExifImageWidth'], exif_data['ExifImageHeight'] = Image.open( image_path).size else: lat, lon = get_lat_lon(exif_data) exif_data['GPSLatitude'] = lat exif_data['GPSLongitude'] = lon # image full path instead of fname only exif_data['FileName'] = image_path return exif_data
def search_list(self, file, find_me_list, top_k=20, probability_threshold=0.50): log("[INFO] Start parsing prediction file {}".format(file), self.verbose) # read/parse the file df = parse(file) gun_list = [] if is_gun_in_list(find_me_list): # if there is a gun in the list, reuse gun search function gun_list = self._search_gun(df, file, top_k, probability_threshold) # no-guns find_me_list = extract_non_gun_items(find_me_list) img_list = [] if find_me_list: # not empty img_list = self._search_list(df, find_me_list, top_k, probability_threshold) return gun_list + img_list
def report(): # --- argument list --- # # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-prediction", "--prediction_file", required=True, help="path to the prediction file to be parsed") ap.add_argument('-v', '--verbose', action='store_true', help="Print lots of debugging statements") ap.add_argument("-k", "--top_k", type=int, default=20, help="retrieve the top-k predictions, default is 20") ap.add_argument( "-t", "--threshold", type=float, default=0.50, help="probability threshold value in decimals ex. 0.75, default is 0.50" ) args = vars(ap.parse_args()) # --- validation --- # # ensure that the arguments supplied are pathnames if not os.path.isfile(args["prediction_file"]): raise AssertionError( "The --prediction_file command line argument should exist and should be write-able." ) verbose = False if args["verbose"]: verbose = True threshold = None if args["threshold"]: threshold = args["threshold"] top_k = 20 if args["top_k"]: top_k = args["top_k"] pred_file = args["prediction_file"] verbose = False if args["verbose"]: verbose = True always_verbose = True # show argument list s = "[INFO] Argument List:\n" + "\n".join([("-->{}: {}".format(x, args[x])) for x in args]) log(s, always_verbose) # always display log("[INFO] Parsing file {} for report".format(pred_file), always_verbose) # read/parse the file df = parse(pred_file) results = parse_for_report(df, verbose, top_k, threshold) # save the file # save in the same folder as the prediction file output_path = os.path.dirname(os.path.abspath(pred_file)) filenames = get_filenames_in_csv(output_path, ["summary_predictions"], get_timestamp()) save([results], output_path, filenames) log("[INFO] Total labels:{}".format(len(results['label'])), always_verbose) log("[INFO] Completed Report ", always_verbose)
def process(self): # process top-20 predictions k = 20 # load the model and its weights model = self._load_model() img_list = self.image_list predictions = [None] * len(img_list) exif_list = [] keys_list = [] unprocessed = [] for i, image_path in enumerate(img_list): # get filename l = os.path.normpath(image_path).split(os.sep) img_fname = l[len(l) - 1] # save predictions # predictions[i] = [img_fname] # image full path instead of fname only predictions[i] = [image_path] try: if not is_image(image_path): raise Exception else: # allow only valid images image = self._load_image(image_path) # classify the image log("[INFO] Classifying image {}".format(img_fname), self.verbose) # predict the image preds = model.predict(image) P = imagenet_utils.decode_predictions(preds, top=k) # for unit test self.decoded_predictions = P # save predictions for (j, (imagenetID, label, prob)) in enumerate(P[0]): predictions[i].append([imagenetID, label, prob]) # loop over the predictions and display the rank-k predictions + # probabilities to our terminal # print in terminal only, do not log because this is already in predictions file if self.verbose: print("{}. {}: {:.2f}%".format( j + 1, label, prob * 100)) # extract the exif information exif_data = extract_exif(image_path, self.verbose) # store exif data per image exif_list.append(exif_data) # needed to know which key has the max columns for the df keys_list.append(len(exif_data.keys())) except: # unprocessed, separate into its own file unprocessed.append(image_path) predictions[i].append([0, "---", 0]) # error in prediction log("[ERROR] Cannot process image {}".format(img_fname), self.verbose) # we dont know which key has the most number of columns (exif data) # so take the key with the max value and use this as reference max_k = get_key_of_max_value(keys_list) # # convert to df to be saved df1 = pd.DataFrame(predictions) df2 = pd.DataFrame(exif_list, columns=exif_list[max_k].keys()) df3 = pd.DataFrame(unprocessed) self.data = [df1, df2, df3]
def predict(): # --- argument list --- # # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--image_path", required=True, help="path to the input image or image directory") ap.add_argument("-model", "--model", type=str, default="inception", choices=['inception', 'vgg16', 'xception', 'resnet'], help="name of pre-trained network to use(not implemented)") ap.add_argument( "-o", "--output_folder", type=str, help= "folder name saved in cbis/output/ where prediction files will be saved" ) ap.add_argument('-v', '--verbose', action='store_true', help="Print lots of debugging statements") args = vars(ap.parse_args()) model_list = ["vgg16", "inception", "xception", "resnet"] # --- validation --- # # ensure a valid model name was supplied via command line argument if args["model"] not in model_list: raise AssertionError( "The --model command line argument should be one from this list [vgg16,inception,xception,resnet]." ) # ensure that the path is write-able or exists if not os.path.isdir(args["image_path"]) | os.path.isfile( args["image_path"]): raise AssertionError( "The --image_path command line argument should exist and should be write-able." ) model = args["model"] img_path = args["image_path"] folder_out = args[ "output_folder"] # if empty it will take the dataset foldername verbose = False if args["verbose"]: verbose = True always_verbose = True # show argument list s = "[INFO] Argument List:\n" + "\n".join([("-->{}: {}".format(x, args[x])) for x in args]) log(s, always_verbose) # always display log("[INFO] Starting to load and index the path {} ...".format(img_path), always_verbose) # define the folder name ts = get_timestamp() output_dir = get_output_directory(ts, img_path, folder_out) # load the model and index the results loader = Loader(index_path=INDEX_PATH, output=output_dir, timestamp=ts, verbose=verbose) loader.load(image_path=img_path, model=model) loader.process() loader.save_predictions() log("[INFO] Completed Loading and Indexing of Results", always_verbose)
def search(): # --- argument list --- # # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-prediction", "--prediction_file", required=True, help="path to the prediction file to be parsed") ap.add_argument("-exif", "--exif_file", required=True, help="path to the exif file to be parsed") # mode to search for guns or just objects ap.add_argument("-s", "--search_list", type=str, default="gun", help="list of search items delimited by comma") ap.add_argument('-v', '--verbose', action='store_true', help="Print lots of debugging statements") ap.add_argument("-k", "--top_k", type=int, default=20, help="retrieve the top-k predictions, default is 20") ap.add_argument("-t", "--threshold", type=float, help="probability threshold value in decimals ex. 0.75, default is 0.50") args = vars(ap.parse_args()) # --- validation --- # # ensure that the arguments supplied are pathnames if not os.path.isfile(args["prediction_file"]): raise AssertionError("The --prediction_file command line argument should exist and should be write-able.") # ensure that the path is write-able or exists if not os.path.isfile(args["exif_file"]): raise AssertionError("The --exif_file command line argument should exist and should be write-able.") pred_file = args["prediction_file"] exif_file = args["exif_file"] verbose = False if args["verbose"]: verbose = True threshold = None if args["threshold"]: threshold = args["threshold"] top_k = 20 if args["top_k"]: top_k = args["top_k"] always_verbose = True # show argument list s = "[INFO] Argument List:\n" + "\n".join([("-->{}: {}".format(x, args[x])) for x in args]) log(s, always_verbose) # always display # TODO - list suggestion from imagenet 1000 classes # DEBUG: # args["search_list"] = "gun,water,SCUBA diver, van" mode, search_list = validate_search_items(args["search_list"]) log("[INFO] Starting to load prediction file {} and exif file {} ...".format(pred_file, exif_file), always_verbose) index_path = os.path.dirname(__file__) s = Searcher(index_path, verbose) image_list = [] image_path_list = [] ## --------------- prediction --------------- ## if 1 == mode: # guns image_list = s.search_gun(pred_file, top_k, threshold) elif 2 == mode: # others image_list = s.search_list(pred_file, search_list, top_k, threshold) else: print("[INFO] Dont waste my time, nothing to search so no results found") if len(image_list) > 0: image_path_list = list(map(lambda x: x[0], image_list)) if verbose: for img in image_list: log("\t{} | {} | {:.2f}%".format(img[0], img[1], float(img[2]) * 100), verbose) log("[INFO] Total images found: {}".format(len(image_path_list)), always_verbose) ## --------------- exif info --------------- ## exif_info = parse_exif(exif_file, image_path_list, verbose) log("[INFO] Total exif information: {}".format(len(exif_info)), always_verbose) # print("length exif_info=",len(exif_info)) # print(image_list) # TODO: Convert to json results log("[INFO] Completed Search ", always_verbose)