class FCOSEvaluator(BaseEvaluator): def __init__(self, iou_thresh, detector_name, weights, config=None): self.THRESHOLDS_FOR_CLASSES = [ 0.3, 0.4928510785102844, 0.5040897727012634, 0.4912887513637543, 0.5016880631446838, 0.5278812646865845, 0.5351834893226624, 0.5003424882888794, 0.4955945909023285, 0.43564629554748535, 0.6089804172515869, 0.666087806224823, 0.5932040214538574, 0.48406165838241577, 0.4062422513961792, 0.5571075081825256, 0.5671307444572449, 0.5268378257751465, 0.5112953186035156, 0.4647842049598694, 0.5324517488479614, 0.5795850157737732, 0.5152440071105957, 0.5280804634094238, 0.4791383445262909, 0.5261335372924805, 0.4906163215637207, 0.523737907409668, 0.47027698159217834, 0.5103300213813782, 0.4645252823829651, 0.5384289026260376, 0.47796186804771423, 0.4403403103351593, 0.5101461410522461, 0.5535093545913696, 0.48472103476524353, 0.5006796717643738, 0.5485560894012451, 0.4863888621330261, 0.5061569809913635, 0.5235867500305176, 0.4745445251464844, 0.4652363359928131, 0.4162440598011017, 0.5252017974853516, 0.42710989713668823, 0.4550687372684479, 0.4943239390850067, 0.4810051918029785, 0.47629663348197937, 0.46629616618156433, 0.4662836790084839, 0.4854755401611328, 0.4156557023525238, 0.4763634502887726, 0.4724511504173279, 0.4915047585964203, 0.5006274580955505, 0.5124194622039795, 0.47004589438438416, 0.5374764204025269, 0.5876904129981995, 0.49395060539245605, 0.5102297067642212, 0.46571290493011475, 0.5164387822151184, 0.540651798248291, 0.5323763489723206, 0.5048757195472717, 0.5302401781082153, 0.48333442211151123, 0.5109739303588867, 0.4077408015727997, 0.5764586925506592, 0.5109297037124634, 0.4685552418231964, 0.5148998498916626, 0.4224434792995453, 0.4998510777950287 ] super(FCOSEvaluator, self).__init__(iou_thresh, detector_name, weights, config=config) def read_json(self, filename): """ Decodes a JSON file & returns its content. Raises: FileNotFoundError: file not found ValueError: failed to decode the JSON file TypeError: the type of decoded content differs from the expected (list of dictionaries) :param filename: [str] name of the JSON file :return: [list] list of the annotations """ if not os.path.exists(filename): raise FileNotFoundError("File %s not found." % filename) try: with open(filename, 'r') as _f: _data = json.load(_f) except json.JSONDecodeError: raise ValueError(f"Failed to decode {filename}.") if not isinstance(_data, list): raise TypeError(f"Decoded content is {type(_data)}. Expected list.") if len(_data) > 0 and not isinstance(_data[0], dict): raise TypeError(f"Decoded content is {type(_data[0])}. Expected dict.") return _data # get the ground-truth bbox coordinates from the json annotation files def load_ground_truth_coordinates(self, filename, view=None): # filename format is 0000XXXX.png, XXXX is a timestamp if view == None: raise TypeError("view cannot be None") annotations = self.read_json(filename) coordinates = list() for annotation in annotations: bbox = annotation['views'][view-1] # 0 - 6, representing C1 - C 7 xmin = bbox['xmin'] ymin = bbox['ymin'] xmax = bbox['xmax'] ymax = bbox['ymax'] if (xmin, ymin, xmax, ymax) == (-1, -1, -1, -1): # person not present continue coordinate = list() coordinate.append((xmin, ymin)) coordinate.append((xmax, ymax)) coordinates.append(coordinate) return coordinates def prepare_detector(self): # Load model cfg.merge_from_file(self.config) cfg.merge_from_list(list()) cfg.MODEL.WEIGHT = self.weights cfg.freeze() self.detector = COCODemo( cfg, confidence_thresholds_for_classes=self.THRESHOLDS_FOR_CLASSES, min_image_size=800) def load_detections_and_confidence(self, image): #self.prepare_detector() detections, confidence = self.detector.get_person_detections(image) return detections, confidence def evaluate_one_set(self, view): # for view in range(7): # examine 1 video at a time for time_stamp in range(0, 1996, 5): # examine each frame if int(time_stamp / 10) == 0: # single digit 000X stamp = "000" + str(time_stamp) elif int(time_stamp / 100) == 0: # two digits 00XX stamp = "00" + str(time_stamp) elif int(time_stamp / 1000) == 0: # three digits 0XXX stamp = "0" + str(time_stamp) else: # XXXX stamp = str(time_stamp) print("Processing C" + str(view) + "0000" + stamp + ".png") img = cv2.imread("../../Wildtrack_dataset/Image_subsets/C" + str(view) + "/0000" + stamp + ".png") detections, confidence = self.load_detections_and_confidence(img) ground_truth_coordinates = self.load_ground_truth_coordinates("../../Wildtrack_dataset/annotations_positions/0000" + stamp + ".json", view=view) self.classify_detections(detections, confidence, ground_truth_coordinates, "C"+str(view)+"0000"+stamp+".png") #self.calculate_final_stats() def evaluate(self): #x = threading.Thread(target=thread_function, args=(1,)) self.prepare_detector() threads = list() for view in range(1, 8): # examine 1 video at a time #print("view:", view) #t = threading.Thread(target=self.evaluate_one_set, args=(view,)) #t.start() #threads.append(t) self.evaluate_one_set(view) #for thread in threads: #thread.join() self.calculate_final_stats()
for view in range(7): # examine 1 video at a time for time_stamp in range(0, 1996, 5): # examine each frame if int(time_stamp / 10) == 0: # single digit 000X stamp = "000" + str(time_stamp) elif int(time_stamp / 100) == 0: # two digits 00XX stamp = "00" + str(time_stamp) elif int(time_stamp / 1000) == 0: # three digits 0XXX stamp = "0" + str(time_stamp) else: # XXXX stamp = str(time_stamp) print("Processing C" + str(view + 1) + "0000" + stamp + ".png") # prepare object that handles inference plus adds predictions on top of image img = cv2.imread("../../Wildtrack_dataset/Image_subsets/C" + str(view + 1) + "/0000" + stamp + ".png") detections, confidence = coco_demo.get_person_detections(img) ground_truth_coordinates = load_ground_truth_coordinates( view, "../../Wildtrack_dataset/annotations_positions/0000" + stamp + ".json") num_ground_truth = len(ground_truth_coordinates) total_gt_boxes += num_ground_truth # add to number of ground-truth bboxes # create dataframe for each image, storing information regarding each prediction it contains #Later appended to the "overall" dataframe image_dataframe = pd.DataFrame(columns=("ImageID", "coordinate", "confidence", "TP", "FP")) image_dataframe["ImageID"] = [ "C" + str(view + 1) + "0000" + stamp
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Webcam Demo") parser.add_argument( "--config-file", default="configs/fcos/fcos_imprv_R_50_FPN_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--weights", default="weights/FCOS_imprv_R_50_FPN_1x.pth", metavar="FILE", help="path to the trained model", ) parser.add_argument( "--images-dir", default="demo/images", metavar="DIR", help="path to demo images directory", ) parser.add_argument( "--min-image-size", type=int, default=800, help="Smallest size of the image to feed to the model. " "Model was trained with 800, which gives best results", ) parser.add_argument( "opts", help="Modify model config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() print(args.opts) # load config from file and command-line arguments cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.MODEL.WEIGHT = args.weights cfg.freeze() # The following per-class thresholds are computed by maximizing # per-class f-measure in their precision-recall curve. # Please see compute_thresholds_for_classes() in coco_eval.py for details. thresholds_for_classes = [ 0.3, 0.4928510785102844, 0.5040897727012634, 0.4912887513637543, 0.5016880631446838, 0.5278812646865845, 0.5351834893226624, 0.5003424882888794, 0.4955945909023285, 0.43564629554748535, 0.6089804172515869, 0.666087806224823, 0.5932040214538574, 0.48406165838241577, 0.4062422513961792, 0.5571075081825256, 0.5671307444572449, 0.5268378257751465, 0.5112953186035156, 0.4647842049598694, 0.5324517488479614, 0.5795850157737732, 0.5152440071105957, 0.5280804634094238, 0.4791383445262909, 0.5261335372924805, 0.4906163215637207, 0.523737907409668, 0.47027698159217834, 0.5103300213813782, 0.4645252823829651, 0.5384289026260376, 0.47796186804771423, 0.4403403103351593, 0.5101461410522461, 0.5535093545913696, 0.48472103476524353, 0.5006796717643738, 0.5485560894012451, 0.4863888621330261, 0.5061569809913635, 0.5235867500305176, 0.4745445251464844, 0.4652363359928131, 0.4162440598011017, 0.5252017974853516, 0.42710989713668823, 0.4550687372684479, 0.4943239390850067, 0.4810051918029785, 0.47629663348197937, 0.46629616618156433, 0.4662836790084839, 0.4854755401611328, 0.4156557023525238, 0.4763634502887726, 0.4724511504173279, 0.4915047585964203, 0.5006274580955505, 0.5124194622039795, 0.47004589438438416, 0.5374764204025269, 0.5876904129981995, 0.49395060539245605, 0.5102297067642212, 0.46571290493011475, 0.5164387822151184, 0.540651798248291, 0.5323763489723206, 0.5048757195472717, 0.5302401781082153, 0.48333442211151123, 0.5109739303588867, 0.4077408015727997, 0.5764586925506592, 0.5109297037124634, 0.4685552418231964, 0.5148998498916626, 0.4224434792995453, 0.4998510777950287 ] demo_im_names = os.listdir(args.images_dir) # prepare object that handles inference plus adds predictions on top of image coco_demo = COCODemo( cfg, confidence_thresholds_for_classes=thresholds_for_classes, min_image_size=args.min_image_size) print("starting...") for im_name in demo_im_names: print(im_name) img = cv2.imread(os.path.join(args.images_dir, im_name)) if img is None: continue start_time = time.time() composite = coco_demo.run_on_opencv_image(img) coordinates, scores = coco_demo.get_person_detections(img) print("coordinates:", coordinates, "scores:", scores) print("{}\tinference time: {:.2f}s".format(im_name, time.time() - start_time)) #cv2.imshow(im_name, composite) print("saving...") cv2.imwrite("demo_output/" + im_name, composite)