def calibration_fn(): """function to pass as argument to the builder function it will be used to optimize the network based on given examples """ print("[CALIBRATION] Starting calibration process...") images_found = sorted(os.listdir(IMAGES_PATH)) print("[CALIBRATION] Obtaining calibration images from {}" .format(images_found)) print("[CALIBRATION] Done! Found {} images for calibration" .format(len(images_found))) print("[CALIBRATION] Starting image yielding...") start_yielding = time.time() for image_path in images_found: input_image = imgutils.read_image_from_cv2(IMAGES_PATH + image_path) resized_image = imgutils.resize_image(input_image, (INPUT_SIZE, INPUT_SIZE)) final_image = resized_image[np.newaxis, ...].astype("uint8") print("[CALIBRATION] Yielding image from {}".format(image_path)) start_yielding = time.time() yield (final_image,) print("[CALIBRATION] Image yielding Done, it took {} ms" .format((time.time()-start_yielding)*1000)) CALIBRATION_TIME = (time.time()-start_yielding)*1000 print("[CALIBRATION] Calibration procces finished, it took {} ms" .format(CALIBRATION_TIME))
def visualize_detections(output_path, detections_path, frames_path, save_results=False): labels = ['pig', 'person'] warmup(20) # load detections data detections = pd.read_csv(detections_path) # get frame paths frame_paths = sorted(glob.glob(frames_path + "*.png"), key=numericalSort) frame_count = 1 for frame_path in frame_paths: # load the frame frame = imgutils.read_image_from_cv2(frame_path) # get frame dimensions height, width, _ = frame.shape # get detections frame_detections = detections.loc[detections["frame"] == frame_count] # copy the image to draw on it drawed_frame = frame.copy() for _, row in frame_detections.iterrows(): # prepare bbox detections bbox = [row.ymin, row.xmin, row.ymax, row.xmax] # check if there is a valid detection if valid_detection(bbox): print("[VIS-DETECTIONS] Detection is valid!") # get label params label = row.detection_class score = float(row.detection_score) print("[VIS-DETECTIONS] Label: {}".format(label)) print("[VIS-DETECTIONS] Score: {}".format(score)) det_width = frame_detections["width"].unique()[0] det_height = frame_detections["height"].unique()[0] # if frame imensions are diferent from frame detections dims # inference was performed with network input size image # we will resize bounding boxes to original frame size if (det_width != width) | (det_height != height): bbox = imgutils.resize_bounding_box(frame.shape, (det_height, det_width, 3), bbox) drawed_frame = imgutils.draw_bounding_box(drawed_frame, bbox, label, score) else: break imgutils.display_frame(drawed_frame, scaled=True) if save_results: imgutils.save_frame(drawed_frame, output_path, detections_path, frame_count) frame_count+=1
def batched_calibration_fn(): """function to pass as argument to the builder function it will be used to optimize the network based on given examples """ print("[CALIBRATION] Starting calibration process...") images_found = sorted(os.listdir(IMAGES_PATH)) print("[CALIBRATION] Obtaining calibration images from {}" .format(images_found)) print("[CALIBRATION] Done! Found {} images for calibration" .format(len(images_found))) print("[CALIBRATION] Starting image yielding...") start_calibration = time.time() batched_input = np.zeros((len(images_found), INPUT_SIZE, INPUT_SIZE, 3), dtype=np.uint8) for input_value in range(len(images_found)): # read and resize the image input_image = imgutils.read_image_from_cv2(IMAGES_PATH + images_found[input_value]) image_data = imgutils.preprocess_image(input_image, (INPUT_SIZE, INPUT_SIZE)) # add a new axis to match requested shape final_image = image_data[np.newaxis, ...].astype("uint8") # add image to batched images batched_input[input_value, :] = final_image print("[CALIBRATION] Adding image {} from {}".format(input_value+1, images_found[input_value])) start_adding = time.time() print("[CALIBRATION] Image adding step Done, it took {} ms" .format((time.time()-start_adding)*1000)) print("[CALIBRATION] Yielding batched input") start_yielding = time.time() yield (final_image,) print("[CALIBRATION] Image yielding Done, it took {} ms" .format((time.time()-start_yielding)*1000)) CALIBRATION_TIME = (time.time()-start_calibration)*1000 print("[CALIBRATION] Calibration procces finished, it took {} ms" .format(CALIBRATION_TIME))
def run_inference_on_images(self, images_path, warmup_iters=0, model_dir=None, labels=None, threshold=0.3, iou=0.45): input_size = int(self.attributes["INPUT_SIZE"]) labels = self.attributes["LABELS"] # get a copy of the graph func #graph_func = self.attributes["graph_func"] if model_dir is not None: saved_model_loaded = self.get_func_from_saved_model(model_dir) else: saved_model_loaded = self.attributes["saved_model_loaded"] #warmup if warmup_iters > 0: print("[INFERENCE] Starting warmup on {} iterations...".format( warmup_iters)) warmup_start = time.time() # get input size from model attributes input_size = int(self.attributes["INPUT_SIZE"]) # create a set of random images and perform inference for i in range(warmup_iters): print("[INFERENCE] Generating image {} with dims {}x{}".format( i + 1, input_size, input_size)) # create random image image = np.random.normal(size=(input_size, input_size, 3)).astype(np.float32) / 255. # conventional conversion (use with opencv option) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. # resize image to netwrk input dimensions resized_image = imgutils.resize_image(image, (input_size, input_size)) images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) # get the detections print("[WARMUP] Now performing warmup inference...") inference_start_time = time.time() detections = saved_model_loaded(input_tensor) print("[WARMUP] Warmup inference took {} ms".format( (time.time() - inference_start_time) * 1000)) print("[INFERENCE] Preprocessing network outputs...") start_output = time.time() # extract output tensors metadata: boxes, confidence scores print("[INFERENCE] Extracting output tensors metadata...") keyval_start = time.time() for key, value in detections.items(): #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value)) boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes)) #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf)) print( "[INFERENCE] Done extracting metadata, it took {}".format( (time.time() - keyval_start) * 1000)) print("[INFERENCE] Performing NMS to output...") nms_start = time.time() # perform non-max supression to retrieve valid detections only boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=threshold) results = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] print("[INFERENCE] NMS done, it took {} ms".format( (time.time() - nms_start) * 1000)) total_output = (time.time() - start_output) * 1000 print( "[INFERENCE] Done procesing output!, it took {}ms".format( total_output)) _ = imgutils.draw_yolo_bounding_boxes(resized_image, results, labels) # display results in ms print("[WARMUP] Warmup finished, it took {} ms".format( (time.time() - warmup_start) * 1000)) # define frame count and a helper function to read # the images sorted by numerical index frame_count = 1 numbers = re.compile(r'(\d+)') def numericalSort(value): parts = numbers.split(value) parts[1::2] = map(int, parts[1::2]) return parts # read image sorted by numerical order image_paths = sorted(glob.glob(images_path + "*.png"), key=numericalSort) print("[INFERENCE] Found {} images in {} ...".format( len(image_paths), images_path)) #create a class to store results bbox_results = BboxResults() performance_results = YoloPerformanceResults() # Iterate over all images, perform inference and update # results dataframe for image_path in image_paths: print("[INFERENCE] Processing frame/image {} from {}".format( frame_count, image_path)) image_filename = image_path.split('/')[-1] # init metadata bbox_results.init_frame_metadata(image_filename, frame_count) performance_results.init_frame_metadata(image_filename, frame_count) # case inference print("[INFERENCE] Loading image with opencv backend...") # opencv option image_loading_start = time.time() image = imgutils.read_image_from_cv2(image_path) total_image_loading = (time.time() - image_loading_start) * 1000 image = image.astype(np.float32) # preprocess image to work on tf print("[INFERENCE] Preprocessing image...") start_preprocessing = time.time() # resize image to netwrk input dimensions resized_image = imgutils.resize_image(image, (input_size, input_size)) resized_image = resized_image / 255. images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) total_preprocessing = (time.time() - start_preprocessing) * 1000 print("[INFERENCE] Preprocessing done!, it took {}ms".format( total_preprocessing)) print("[INFERENCE] Images data: {} - shape: {} - dtype {}".format( images_data, images_data.shape, images_data.dtype)) print("[INFERENCE] Input tensor: {} - shape: {} - dtype {}".format( input_tensor, input_tensor.shape, input_tensor.dtype)) print("[INFERENCE] Now performing inference...") inference_start_time = time.time() # get the detections detections = saved_model_loaded(input_tensor) total_inference = (time.time() - inference_start_time) * 1000 print("[INFERENCE] Inference took {} ms".format(total_inference)) print("[INFERENCE] Preprocessing network outputs...") start_output = time.time() # extract output tensors metadata: boxes, confidence scores print("[INFERENCE] Extracting output tensors metadata...") keyval_start = time.time() for key, value in detections.items(): #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value)) boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes)) #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf)) print("[INFERENCE] Done extracting metadata, it took {}".format( (time.time() - keyval_start) * 1000)) print("[INFERENCE] Performing NMS to output...") nms_start = time.time() # perform non-max supression to retrieve valid detections only boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=threshold) results = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] print("[INFERENCE] NMS done, it took {} ms".format( (time.time() - nms_start) * 1000)) total_output = (time.time() - start_output) * 1000 print("[INFERENCE] Done procesing output!, it took {}ms".format( total_output)) # draw results on image if (bbox_results is not None) and (performance_results is not None): drawing_time = imgutils.draw_yolo_bounding_boxes( resized_image, results, labels, bbox_results=bbox_results) height, width, _ = image.shape # add new performance results to object performance_results.add_new_result(width, height, total_image_loading, total_preprocessing, total_inference, total_output, drawing_time) else: _ = imgutils.draw_yolo_bounding_boxes(resized_image, results, labels, save=True) print("[INFERENCE] Image/frame {} processed".format(frame_count)) frame_count += 1 print("[INFERENCE] All frames procesed!") output_path = "{}results/{}".format(images_path, self.attributes["MODEL_NAME"]) output_path += "/opencv-backend" model_name = self.attributes["MODEL_NAME"] precision = self.attributes["precision"] # save results obtained from performance and detections to output bbox_results.save_results(output_path, model_name, precision, threshold, resize=True, opencv=True) performance_results.save_results(output_path, model_name, precision, threshold, resize=True, opencv=True)
def run_inference(self, image_path=None, warmup_iters=0, threshold=0.5, iou=0.45, model_dir=None): input_size = int(self.attributes["INPUT_SIZE"]) labels = self.attributes["LABELS"] # get a copy of the graph func #graph_func = self.attributes["graph_func"] if model_dir is not None: saved_model_loaded = self.get_func_from_saved_model(model_dir) else: saved_model_loaded = self.attributes["saved_model_loaded"] #warmup if warmup_iters > 0: print("[INFERENCE] Starting warmup on {} iterations...".format( warmup_iters)) warmup_start = time.time() # get input size from model attributes input_size = int(self.attributes["INPUT_SIZE"]) # create a set of random images and perform inference for i in range(warmup_iters): print("[INFERENCE] Generating image {} with dims {}x{}".format( i + 1, input_size, input_size)) # working with numpy/cv backend # create random image resized_image = np.random.normal( size=(input_size, input_size, 3)).astype(np.float32) / 255. images_data = np.asarray([resized_image]).astype(np.float32) input_tensor = tf.constant(images_data) images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) # get the detections print("[WARMUP] Now performing warmup inference...") inference_start_time = time.time() detections = saved_model_loaded(input_tensor) print("[WARMUP] Warmup inference took {} ms".format( (time.time() - inference_start_time) * 1000)) print("[INFERENCE] Preprocessing network outputs...") start_output = time.time() # extract output tensors metadata: boxes, confidence scores print("[INFERENCE] Extracting output tensors metadata...") keyval_start = time.time() for key, value in detections.items(): #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value)) boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes)) #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf)) print( "[INFERENCE] Done extracting metadata, it took {}".format( (time.time() - keyval_start) * 1000)) print("[INFERENCE] Performing NMS to output...") nms_start = time.time() # perform non-max supression to retrieve valid detections only boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=threshold) results = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] print("[INFERENCE] NMS done, it took {} ms".format( (time.time() - nms_start) * 1000)) total_output = (time.time() - start_output) * 1000 print( "[INFERENCE] Done procesing output!, it took {}ms".format( total_output)) # draw dummy results _ = imgutils.draw_yolo_bounding_boxes(resized_image, results, labels) # display results in ms print("[WARMUP] Warmup finished, it took {} ms".format( (time.time() - warmup_start) * 1000)) # case inference print("[INFERENCE] Loading image with opencv backend...") # opencv option image_loading_start = time.time() image = imgutils.read_image_from_cv2(image_path) total_image_loading = (time.time() - image_loading_start) * 1000 image = image.astype(np.float32) # preprocess image to work on tf print("[INFERENCE] Preprocessing image...") start_preprocessing = time.time() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # resize image to netwrk input dimensions resized_image = imgutils.resize_image(image, (input_size, input_size)) resized_image = resized_image / 255. images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) total_preprocessing = (time.time() - start_preprocessing) * 1000 print("[INFERENCE] Preprocessing done!, it took {}ms".format( total_preprocessing)) print("[INFERENCE] Images data: {} - shape: {} - dtype {}".format( images_data, images_data.shape, images_data.dtype)) print("[INFERENCE] Input tensor: {} - shape: {} - dtype {}".format( input_tensor, input_tensor.shape, input_tensor.dtype)) print("[INFERENCE] Now performing inference...") inference_start_time = time.time() # get the detections detections = saved_model_loaded(input_tensor) total_inference = (time.time() - inference_start_time) * 1000 print("[INFERENCE] Inference took {} ms".format(total_inference)) print("[INFERENCE] Preprocessing network outputs...") start_output = time.time() # extract output tensors metadata: boxes, confidence scores print("[INFERENCE] Extracting output tensors metadata...") keyval_start = time.time() for key, value in detections.items(): #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value)) boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes)) #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf)) print("[INFERENCE] Done extracting metadata, it took {}".format( (time.time() - keyval_start) * 1000)) print("[INFERENCE] Performing NMS to output...") nms_start = time.time() # perform non-max supression to retrieve valid detections only boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=threshold) results = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] print("[INFERENCE] NMS done, it took {} ms".format( (time.time() - nms_start) * 1000)) total_output = (time.time() - start_output) * 1000 print("[INFERENCE] Done procesing output!, it took {}ms".format( total_output)) # draw results and save _ = imgutils.draw_yolo_bounding_boxes(resized_image, results, labels, save=True) # debugging info """print("[INFERENCE-DEBUG] tf-boxes ", boxes, type(boxes))
def convert_to_xml(detections_path, images_path): # get output path to save files into output_path = get_output_path(detections_path, images_path) # get detections dataframe detections = get_detections_csv(detections_path) # get image paths image_paths = sorted(glob.glob(images_path + "*.png"), key=numericalSort) # iterate over all images for image_path in image_paths: splitted_path = image_path.split('/') filename = splitted_path[-1] folder = splitted_path[-2] image = imgutils.read_image_from_cv2(image_path) # get the shape of the image to compare to # image dimensions from detections height, width, depth = image.shape print("[CONVERT-TO-XML] Processing file {} from folder {}".format(filename, folder)) image_detections = detections.loc[detections["filename"] == filename] if "N/D" in image_detections.values: continue # set file annotations root = ET.Element('annotation') folder_anno = ET.SubElement(root, "folder") folder_anno.text = folder filename_anno = ET.SubElement(root, "filename") filename_anno.text = filename path_anno = ET.SubElement(root, "path") path_anno.text = image_path # set source annotations source_anno = ET.SubElement(root, "source") db_anno = ET.SubElement(source_anno, "database") db_anno.text = "Unknown" # set size annotations size_anno = ET.SubElement(root, "size") width_anno = ET.SubElement(size_anno, "width") width_anno.text = str(width) height_anno = ET.SubElement(size_anno, 'height') height_anno.text = str(height) depth_anno = ET.SubElement(size_anno, "depth") depth_anno.text = str(depth) # set segmented segmented_anno = ET.SubElement(root, "segmented") segmented_anno.text = '0' for _, row in image_detections.iterrows(): detection_class = row.detection_class det_width, det_height = row.width, row.height # format bbox to resize bbox = np.array([row.ymin, row.xmin, row.ymax, row.xmax]).astype(int) # check for resize bounding box if (width != det_width) | (height != det_height): bbox = imgutils.resize_bounding_box((height, width, 3), (det_height, det_width, 3), bbox) ymin = bbox[0] xmin = bbox[1] ymax = bbox[2] xmax = bbox[3] object_anno = ET.SubElement(root, "object") name_anno = ET.SubElement(object_anno, "name") name_anno.text = detection_class pose_anno = ET.SubElement(object_anno, "pose") pose_anno.text = "Unspecified" truncated_anno = ET.SubElement(object_anno, "truncated") truncated_anno.text = '0' difficult_anno = ET.SubElement(object_anno, "difficult") difficult_anno.text = '0' bbox_anno = ET.SubElement(object_anno, "bndbox") xmin_anno = ET.SubElement(bbox_anno, "xmin") xmin_anno.text = str(xmin) ymin_anno = ET.SubElement(bbox_anno, "ymin") ymin_anno.text = str(ymin) xmax_anno = ET.SubElement(bbox_anno, "xmax") xmax_anno.text = str(xmax) ymax_anno = ET.SubElement(bbox_anno, "ymax") ymax_anno.text = str(ymax) tree = ET.ElementTree(root) filename_only = get_filename_only(filename) + ".xml" print("[CSV-TO-XML] Saving file to {}{}".format(output_path, filename_only)) tree.write(output_path + filename_only)
def track_objects(detections_path, frames_path, output_path, display_results=False, save_results=False): # perform warmup to visualize results in "real time" warmup(20, display_results=display_results) # instantiate a tracker results object sort_results = SortResults() sort_performance = SortPerformanceResults() # get the detections file detections = pd.read_csv(args.detections_path).iloc[:, 1:] # get image dims to set boundaries width = detections["width"].unique()[0] height = detections["height"].unique()[0] # instantiate the tracker with given boundaries tracker = Sort(dims=(width, height)) # read frames sorted by numerical order frame_paths = sorted(glob.glob(frames_path + "*.png"), key=numericalSort) frame_count = 1 for frame_path in frame_paths: frame_filename = frame_path.split('/')[-1] # init sort results metadata (imaage and frame count) sort_results.init_frame_metadata(frame_path, frame_count) sort_performance.init_frame_metadata(frame_path, frame_count) # init a bbox array to store bboxes current_bboxes = np.array([[]]).astype(int) # get detections of current frame pig_detections_data = detections.loc[(detections["frame"] == frame_count) & (detections["detection_class"] == 'pig')] if pig_detections_data.shape[0] == 0: print("[SORT] No detections for pigs here, skipping frame {}" .format(frame_count)) frame_count += 1 continue print(pig_detections_data.head()) #sys.exit() # get frame and its dimensions print("[SORT] Loading image with opencv...") start_loading = time.time() frame = imgutils.read_image_from_cv2(frame_path) total_loading = (time.time()-start_loading)*1000 print("[SORT] Done!, it took {}ms".format(total_loading)) frame_height, frame_width, _ = frame.shape # get detections results image dims width = pig_detections_data["width"].unique()[0] height = pig_detections_data["height"].unique()[0] print("[SORT] Starting image preprocessing...") start_preprocessing = time.time() # resize image to match detections results if (frame_height != height) or (frame_width != width): print("[SORT] Frame dims are not the same from the detections") print("[SORT] Resizing...") resized_frame = imgutils.resize_image(frame, (width, height)) print("[SORT] Done!") else: print("[SORT] Frame dims are the same from the detections") resized_frame = frame total_preprocessing = (time.time()-start_preprocessing)*1000 print("[SORT] Done!, it took {}ms".format(total_preprocessing)) print("[SORT] Preparing detections for tracker...") start_preparing = time.time() for _, row in pig_detections_data.iterrows(): # prepare bbox detections to format required by sort tracker x1 = row.xmin x2 = row.xmax y1 = row.ymin y2 = row.ymax # check if there is a valid detection if valid_detection([x1, x2, y1, y2]): print("[SORT] Detection is valid!") # fomat detection for sort input bbox = np.array([[x1, y1, x2, y2]]).astype(int) print("[SORT] Bbox: ", bbox) if current_bboxes.size == 0: current_bboxes = bbox else: np.append(current_bboxes, bbox, axis=0).astype(int) else: print("[SORT] Detection is invalid!, skipping...") total_preparing = (time.time()-start_preparing)*1000 print("[SORT] Done!, it took {}ms".format(total_preparing)) # send bbox to tracker # if detection is unmatched, it will initialize a new tracker # if its matched it should should, predict and update print("[SORT] Updating trackers...") start_update = time.time() objects_tracked = tracker.update(current_bboxes) total_update = (time.time()-start_update)*1000 print("[SORT] Done!, it took {}ms".format(total_update)) print("[SORT] Actual trackers: ", tracker.total_trackers) print("[SORT] Objects tracked: ", objects_tracked) #sys.exit() # get tracking results print("[SORT] Getting tracker results...") start_results = time.time() # set a copy of the current frame to display results drawed_frame = np.copy(resized_frame) for object_tracked in objects_tracked: # get trackers info like state and time since update tracker_id = object_tracked.id tracker_state = "active" if object_tracked.active else "inactive" time_since_update = object_tracked.time_since_update initialized_in_roi = object_tracked.initialized_in_ROI # get the bbox returned as [x1, y1, x2, y2] bbox = object_tracked.get_state().astype(int) first_centroid = (object_tracked.first_centroid[0], object_tracked.first_centroid[1]) last_centroid = (object_tracked.last_centroid[0], object_tracked.last_centroid[1]) # get x-y coords xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] print("[SORT] Bbox from tracker: ", bbox) print("[SORT] Active?: ", tracker_state) print("[SORT] Time since update: ", time_since_update) print("[SORT] Initialized in roi?: " , initialized_in_roi) # draw bboxes and label on frame drawed_frame = imgutils.draw_trackers_bounding_box(drawed_frame, "PIG", object_tracked) # add a new result to trackers results sort_results.add_new_result(width, height, tracker_id, tracker_state, time_since_update, initialized_in_roi, first_centroid, last_centroid, [xmin, xmax, ymin, ymax]) total_results = (time.time()-start_results)*1000 print("[SORT] Tracker results done!, it took {}ms".format(total_results)) # update trackers state to active/inactive depending on position print("[SORT] Updating state to trackers") start_update_state = time.time() tracker.update_trackers_state() total_update_state = (time.time()-start_update_state)*1000 print("[SORT] States update!, it took {}ms".format(total_update_state)) # draw trackers info on the image drawed_frame = imgutils.draw_tracker_info(drawed_frame, "PIG", tracker) # display results on screen, if scaled will adapt frame to screen dimensions if display_results: imgutils.display_frame(drawed_frame, scaled=True, message="Tracker") # save results to a given path if save_results: imgutils.save_frame(drawed_frame, output_path, detections_path, frame_count, "tracker_frames") total_trackers = tracker.total_trackers sort_performance.add_new_result(width, height, total_loading, total_preprocessing, total_preparing, total_update, total_results, total_update_state, total_trackers) frame_count+=1 # save tracker results on given path sort_results.save_results(output_path, detections_path) # save performance results on given path sort_performance.save_results(output_path, detections_path) return tracker.total_trackers
def run_inference(self, image_path=None, warmup_iters=0, threshold=0.5, iou=0.45, model_dir=None, bbox_results=None, performance_results=None, opencv=False, resize=False): input_size = int(self.attributes["INPUT_SIZE"]) labels = self.attributes["LABELS"] # get a copy of the graph func #graph_func = self.attributes["graph_func"] # Load the saved model model_loaded = tf.saved_model.load(model_dir, tags=[tag_constants.SERVING]) saved_model_loaded = model_loaded.signatures['serving_default'] #warmup if warmup_iters > 0: print("[INFERENCE] Starting warmup on {} iterations...".format( warmup_iters)) warmup_start = time.time() # get input size from model attributes input_size = int(self.attributes["INPUT_SIZE"]) # create a set of random images and perform inference for i in range(warmup_iters): print("[INFERENCE] Generating image {} with dims {}x{}".format( i + 1, input_size, input_size)) # working with numpy/cv backend if opencv: # create random image resized_image = np.random.normal(size=( input_size, input_size, 3)).astype(np.float32) / 255. # conventional conversion (use with opencv option) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. #input_tensor = tf.convert_to_tensor(resized_image) #input_tensor = tf.convert_to_tensor(image) # The model expects a batch of images, so add an axis with `tf.newaxis`. #input_tensor = input_tensor[tf.newaxis, ...] images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) # working with tf backend for image else: dataset, _, _ = imgutils.get_dataset_tf( tensor_type='float', input_size=input_size) """print("[WARMUP] Creating features...") features = np.random.normal(loc=112, scale=70, size=(1, input_size, input_size, 3)).astype(np.float32) features = np.clip(features, 0.0, 255.0).astype(np.float32) features = tf.convert_to_tensor(value=tf.compat.v1.get_variable( "features", initializer=tf.constant(features))) print("[WARMUP] Creating dataset from features...") dataset = tf.data.Dataset.from_tensor_slices([features]) dataset = dataset.repeat(count=1)""" print("[WARMUP] Retrieving image and input tensor...") dataset_enum = enumerate(dataset) # get input tensor and cast to image (np) input_tensor = list(dataset_enum)[0][1] resized_image = input_tensor.numpy()[0] images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) # get the detections print("[WARMUP] Now performing warmup inference...") inference_start_time = time.time() detections = saved_model_loaded(input_tensor) print("[WARMUP] Warmup inference took {} ms".format( (time.time() - inference_start_time) * 1000)) print("[INFERENCE] Preprocessing network outputs...") start_output = time.time() # extract output tensors metadata: boxes, confidence scores print("[INFERENCE] Extracting output tensors metadata...") keyval_start = time.time() for key, value in detections.items(): #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value)) boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes)) #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf)) print( "[INFERENCE] Done extracting metadata, it took {}".format( (time.time() - keyval_start) * 1000)) print("[INFERENCE] Performing NMS to output...") nms_start = time.time() # perform non-max supression to retrieve valid detections only boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=threshold) results = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] print("[INFERENCE] NMS done, it took {} ms".format( (time.time() - nms_start) * 1000)) total_output = (time.time() - start_output) * 1000 print( "[INFERENCE] Done procesing output!, it took {}ms".format( total_output)) _ = imgutils.draw_yolo_bounding_boxes(resized_image, results, labels) # display results in ms print("[WARMUP] Warmup finished, it took {} ms".format( (time.time() - warmup_start) * 1000)) # case inference if opencv: print("[INFERENCE] Loading image with opencv backend...") # opencv option image_loading_start = time.time() image = imgutils.read_image_from_cv2(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) total_image_loading = (time.time() - image_loading_start) * 1000 # preprocess image to work on tf print("[INFERENCE] Preprocessing image...") start_preprocessing = time.time() # resize image to netwrk input dimensions resized_image = imgutils.resize_image(image, (input_size, input_size)) resized_image = resized_image / 255. # conventional conversion (use with opencv option) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. #input_tensor = tf.convert_to_tensor(resized_image) #input_tensor = tf.convert_to_tensor(image) # The model expects a batch of images, so add an axis with `tf.newaxis`. #input_tensor = input_tensor[tf.newaxis, ...] images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) total_preprocessing = (time.time() - start_preprocessing) * 1000 print("[INFERENCE] Preprocessing done!, it took {}ms".format( total_preprocessing)) # case tf backend to manipulate images else: print("[INFERENCE] Loading image with tf backend...") # dataset option, yolo models require resizing to input size dataset, total_image_loading, total_preprocessing = imgutils.get_dataset_tf( image_path=image_path, input_size=input_size, tensor_type='float') #print("[INFERENCE] dataset {}".format(dataset)) # take the batched image dataset_enum = enumerate(dataset) input_tensor = list(dataset_enum)[0][1] # take image as np and convert to rgb image_bgr = input_tensor.numpy()[0] resized_image = image_bgr[..., ::-1].copy() / 255. images_data = [] for i in range(1): images_data.append(resized_image) images_data = np.asarray(images_data).astype(np.float32) input_tensor = tf.constant(images_data) print( "[INFERENCE] Images data: {} - shape: {} - dtype {} - type {}". format(images_data, images_data.shape, images_data.dtype, type(images_data))) print( "[INFERENCE] Input tensor: {} - shape: {} - dtype {} - type {}" .format(input_tensor, input_tensor.shape, input_tensor.dtype, type(input_tensor))) print("[INFERENCE] Now performing inference...") inference_start_time = time.time() #print("[INFERENCE] Input tensor: {} - dtype {}" # .format(input_tensor, input_tensor.dtype)) # get the detections detections = saved_model_loaded(input_tensor) total_inference = (time.time() - inference_start_time) * 1000 print("[INFERENCE] Inference took {} ms".format(total_inference)) print("[INFERENCE] Preprocessing network outputs...") start_output = time.time() # extract output tensors metadata: boxes, confidence scores print("[INFERENCE] Extracting output tensors metadata...") keyval_start = time.time() for key, value in detections.items(): #print("[INFERENCE-DEBUG] key {}: value {}".format(key, value)) boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] #print("[INFERENCE-DEBUG] boxes ", boxes, type(boxes)) #print("[INFERENCE-DEBUG] confidence ", pred_conf, type(pred_conf)) print("[INFERENCE] Done extracting metadata, it took {}".format( (time.time() - keyval_start) * 1000)) print("[INFERENCE] Performing NMS to output...") nms_start = time.time() # perform non-max supression to retrieve valid detections only boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=threshold) results = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] print("[INFERENCE] NMS done, it took {} ms".format( (time.time() - nms_start) * 1000)) total_output = (time.time() - start_output) * 1000 print("[INFERENCE] Done procesing output!, it took {}ms".format( total_output)) # draw results on image if (bbox_results is not None) and (performance_results is not None): drawing_time = imgutils.draw_yolo_bounding_boxes( resized_image, results, labels, bbox_results=bbox_results) height, width, _ = image.shape # add new performance results to object performance_results.add_new_result(width, height, total_image_loading, total_preprocessing, total_inference, total_output, drawing_time) else: _ = imgutils.draw_yolo_bounding_boxes(resized_image, results, labels, save=True) # debugging info """print("[INFERENCE-DEBUG] tf-boxes ", boxes, type(boxes))