def detect_fall(YoloV3, img, input_size=416, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''): try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: raise ('Invalid image!') image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) t1 = time.time() pred_bbox = YoloV3.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') ms = (t2 - t1) * 1000 fps = 1000 / ms print('Time: {:.2f}ms, {:.1f} FPS'.format(ms, fps)) fall_bboxes = [] for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) class_ind = int(bbox[5]) (x1, y1), (x2, y2) = (coor[0], coor[1]), (coor[2], coor[3]) if check_fall(CLASSES, class_ind, x2 - x1, y2 - y1): fall_bboxes.append(bbox) if len(fall_bboxes) > 0: image = draw_bbox(original_image, fall_bboxes, rectangle_colors=rectangle_colors) cv2.imwrite('fall-detection.jpg', image) return True else: return False
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = []): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) print("VIDEO PROPERTIES:FrameCount:{}\tWidth:{}\tHeight:{}\tFps:{}\t".format(length,width,height,fps)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) #1.BACKGROUND DETECTION backSub = cv2.createBackgroundSubtractorMOG2(history = 400, varThreshold = 16, detectShadows = False) bgMask=None frame_no=0 while True: _, frame = vid.read() frame_no=frame_no+1 try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break #1.1 BACKGROUND Update fgMask = backSub.apply(original_frame) bgMask = backSub.getBackgroundImage() if frame_no % 100==0: print(frame_no) image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) #t1 = time.time() #pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class() #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index(class_name)] # Get predicted object index by object name tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function #Save to File box_item=bbox.tolist() + [tracking_id, index,frame_no] ts.save(box_item) # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2-t1) times_2.append(t3-t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times)/len(times)*1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2)/len(times_2)*1000) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.imwrite(ts.out_bg_img,bgMask) cv2.destroyAllWindows()
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = [], custom_yolo=None, custom_classes=YOLO_CUSTOM_CLASSES, Custom_track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) # set a bunch of flags and variables for made baskets and possessions possession = None possession_list = [] combined_possession_avg = 0.5 total_basket_count=0 basket_frame_list = [] baskets_dict = {"Dark": 0, "Light": 0} made_basket_first_frame = 0 made_basket_frames = 0 basket_marked = False if custom_yolo: NUM_CUSTOM_CLASS = read_class_names(custom_classes) custom_key_list = list(NUM_CUSTOM_CLASS.keys()) custom_val_list = list(NUM_CUSTOM_CLASS.values()) frame_counter = 0 # loop through each frame in video while True: _, frame = vid.read() try: first_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB) frame_counter += 1 except: break image_data = image_preprocess(np.copy(first_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() # CUSTOM BLOCK FOR BASKETBALL if custom_yolo: if YOLO_FRAMEWORK == "tf": # use yolo model to make prediction on the image data custom_pred_bbox = custom_yolo.predict(image_data) # reshape our data to be in correct form for processing custom_pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in custom_pred_bbox] custom_pred_bbox = tf.concat(custom_pred_bbox, axis=0) # get boxes based on threshhold custom_bboxes = postprocess_boxes(custom_pred_bbox, original_frame, input_size, 0.3) # custom_bboxes = nms(custom_bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names custom_boxes, custom_scores, custom_names = [], [], [] for bbox in custom_bboxes: if len(Custom_track_only) !=0 and NUM_CUSTOM_CLASS[int(bbox[5])] in Custom_track_only or len(Custom_track_only) == 0: custom_boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)]) custom_scores.append(bbox[4]) custom_names.append(NUM_CUSTOM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. custom_boxes = np.array(custom_boxes) custom_names = np.array(custom_names) custom_scores = np.array(custom_scores) # take note of the highest "scoring" made basket and basketball obj in each frame highest_scoring_basketball = 0 basketball_box = None basketball_center = None highest_scoring_made_basket = 0 made_basket_box = None for i, bbox in enumerate(custom_bboxes): # loop through each bounding box to get the "best one" of the frame # we do this because sometimes our model will detect two, and we know there can only be one name = custom_names[i] score = round(custom_scores[i], 3) if name == 'basketball': if score > highest_scoring_basketball: highest_scoring_basketball = score basketball_box = bbox if name == 'made-basket': if score > .85 and score > highest_scoring_made_basket: highest_scoring_made_basket = score made_basket_box = bbox # if it sees a basketball, put a box on it and note the center (for possession) if basketball_box is not None: cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1])), (int(basketball_box[2]), int(basketball_box[3])), (0,0,255), 1) cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1]-30)), (int(basketball_box[0])+(10)*17, int(basketball_box[1])), (0,0,255), -1) cv2.putText(original_frame, "basketball" + "-" + str(highest_scoring_basketball),(int(basketball_box[0]), int(basketball_box[1]-10)),0, 0.5, (255,255,255),1) basketball_center = ( (basketball_box[2]+basketball_box[0])/2, (basketball_box[3]+basketball_box[1])/2 ) if made_basket_box is not None: # if theres a made basket put the box on it cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1])), (int(made_basket_box[2]), int(made_basket_box[3])), (0,255,0), 1) cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1]-30)), (int(made_basket_box[0])+(15)*17, int(made_basket_box[1])), (0,255,0), -1) cv2.putText(original_frame, "made-basket" + " " + str(highest_scoring_made_basket),(int(made_basket_box[0]), int(made_basket_box[1]-10)),0, 0.6, (0,0,0),1) if made_basket_frames == 0: # if this is the first frame in the sequence made_basket_first_frame = frame_counter # increment a counter for made basket frames made_basket_frames += 1 # if there were 3 consecuative frames AND we havnt marked the basket yet then lets count it! if made_basket_frames >= 3 and not basket_marked: basket_marked = True basket_frame_list.append(made_basket_first_frame) if possession: # record which "team" scored the basket baskets_dict[possession] += 1 # if no made basket make sure the made basket counter is at zero else: # no made basket made_basket_frames = 0 # 60 frames after a made basket we can reset the "marked basket" flag to False # in essence this means we start looking for made baskets again if basket_marked and frame_counter > basket_frame_list[-1] + 60: basket_marked = False # END CUSTOM BLOCK # PRESON PREDICTION and TRACKING BLOCK if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0: w = bbox[2].astype(int)-bbox[0].astype(int) h = bbox[3].astype(int)-bbox[1].astype(int) if h < height/3 and w < width/4: if h > 120: boxes.append([bbox[0].astype(int), bbox[1].astype(int), w, h]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) # detect jersey color using the tracked persons bounding box patches = [gdet.extract_image_patch(frame, box, [box[3], box[2]]) for box in boxes] color_ratios = [find_color(patch) for patch in patches] features = np.array(encoder(original_frame, boxes)) # mark the detection detections = [Detection(bbox, score, class_name, feature, color_ratio) for bbox, score, class_name, feature, color_ratio in zip(boxes, scores, names, features, color_ratios)] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] color_ratio_list = [] check_possession = False for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue color_ratio = track.get_color_ratio() color_ratio_list.append(color_ratio) bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class() #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index(class_name)] # Get predicted object index by object name tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function # if there is a basketball in the frame, and its "in" a ersons bounding box, check what box it is in for psosession if basketball_center: if basketball_center[0] >= bbox[0] and basketball_center[0] <= bbox[2]: if basketball_center[1] >= bbox[1] and basketball_center[1] <= bbox[3]: check_possession = True if color_ratio <= .2: # light team possession_list.append(0) else: # dark team possession_list.append(1) else: # no basketball in frame # possession_list.append(-1) # test_list.pop(0) pass # if the ball is in a bounding box, update out possession tracker if check_possession: if len(possession_list) > 60: # this function takes an average of the last 60 posessions marked to determine current position # it weights the most recent detections more # this algo is a WIP possession_list = possession_list[-60:] # full_avg = sum(possession_list)/len(possession) last_60_avg = sum(possession_list[-60:])/60 last_30_avg = sum(possession_list[-30:])/30 last_15_avg = sum(possession_list[-15:])/15 last_5_avg = sum(possession_list[-5:])/5 combined_possession_avg = round((last_60_avg + last_30_avg + last_15_avg + last_5_avg)/4,3) #most_common_possession = stats.mode(possession_list)[0] else: combined_possession_avg = round(sum(possession_list)/len(possession_list),3) # use our possession average to determine who has the ball right now if combined_possession_avg < 0.5: possession = "Light" elif combined_possession_avg > 0.5: possession = "Dark" # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, color_ratios=color_ratio_list, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2-t1) times_2.append(t3-t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times)/len(times)*1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2)/len(times_2)*1000) if possession == "Light": image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (50, 255, 255), 2) else: image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # image = cv2.putText(image, "Light: {} Dark: {} None: {}".format(possession_list.count(0), possession_list.count(1), possession_list.count(-1)), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) image = cv2.putText(image, "Posession Avg: {}".format(combined_possession_avg), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows() return_data = {"baskets_dict": baskets_dict, "basket_frame_list": basket_frame_list} print("video saved to {}".format(output_path)) return(return_data)
def Object_tracking(YoloV3, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times = [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) t1 = time.time() pred_bbox = YoloV3.predict(image_data) t2 = time.time() times.append(t2 - t1) times = times[-20:] pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_image, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function ms = sum(times) / len(times) * 1000 fps = 1000 / ms # draw detection on frame image = draw_bbox(original_image, tracked_bboxes, CLASSES=CLASSES, tracking=True) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
print(f'found box : {len(pred_bbox)}') # %% bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold=0.3) print(f'after post-process found box : {len(bboxes)}') bboxes = nms(bboxes, iou_threshold=0.45, method='nms') print(f'after nms found box : {len(bboxes)}') # bboxes # %% image = draw_bbox(original_image, bboxes, CLASSES='../' + YOLO_COCO_CLASSES, rectangle_colors='') # %% display(Image.fromarray(image)) # plt.figure() # plt.imshow(image) # plt.colorbar() # plt.grid(False) # plt.show()
continue bbox = track.to_tlbr( ) # Get the corrected/predicted bounding box #`(min x, miny, max x,max y)`. class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function # draw detection on frame image = draw_bbox(image, tracked_bboxes, CLASSES=CLASSES, tracking=True) #indices = cv2.dnn.NMSBoxes(boxes_l, confidences_l, 0.1, 0.1) centroids = [] coordinates = [] if tracked_bboxes != []: for box in tracked_bboxes: coords = calc_coord_trackbox(box) #print(coords) centr = calculate_centr(coords) centroids.append(centr) coordinates.append(coords) #if class_ids[i]==0:
def track_object(Yolo, video_path, vid_output_path, text_output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', tracking=True, track_only=[], tracker_max_age=30, passenger_det=False, face_score_threshold=0.3, color="bincount"): """ Do detection on video :param Yolo: <model_obj> YOLO model for vehicle detection :param video_path: <str> Path to video file. Leave empty to use camera :param vid_output_path: <str> Path to save processed video. Leave empty to not save :param input_size: <int> YOLO model input size :param show: <bool> True if you want to see processing live :param CLASSES: <obj> YOLO model classed. By default they are taken from the config file :param score_threshold: <float> minimum confidence for vehicle detection :param iou_threshold: <float> minimum bounding box overlap for them to be counted as same object :param rectangle_colors: bounding box colors. Currently does nothing :param tracking: whether to use vehicle tracking :param track_only: <list> List of objects to track if detector detects more :param tracker_max_age: <int> number of missed before track is deleted :param face_det: <bool> whether to initialize face detection :param face_score_threshold: <float> minimum confidence for face detection :param color: <str> Color detection method to use. None if neither one :return: """ if not Yolo: Yolo = load_yolo_model() if passenger_det: passenger_det = FaceDetector() else: passenger_det = None if text_output_path: write_csv([[ "x1", "y1", "x2", "y2", "id", "class", "probability", "color" if color else None, "passengers" if passenger_det else None ]], text_output_path) # Definition of the deep sort parameters max_cosine_distance = 0.7 nn_budget = None # initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=tracker_max_age) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(vid_output_path, codec, fps, (width, height)) # vid_output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) while True: _, frame = vid.read() try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) # image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(track_only) != 0 and NUM_CLASS[int( bbox[5])] in track_only or len(track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # if score >= confidence_threshold] # Pass detections to the deep sort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) # Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index, track.class_confidence] ) # Structure data, that we could use it with our draw_bbox function # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True, color=color, text_output_path=text_output_path, passenger_detector=passenger_det, passenger_threshold=face_score_threshold) t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2)) if vid_output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=True, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[]): output_file = "D:/PELUSO/ITSligo/lectures_MENG/4-Symulation and Testing/assignments/assignment 2 - group/simV5_anotation.CSV" csv_file = open(output_file, mode='a') #new results_csv = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) results_csv.writerow([ 'Frame_index', 'Score', 'Confidence', 'Pixel_Area', 'X1', 'Y1', 'X2', 'Y2', 'ClassID' ]) # Definition of the parameters max_cosine_distance = 0.9 nn_budget = None #initialize deep sort object model_filename = "D:/PELUSO/ITSligo/lectures_MENG/4-Symulation and Testing/assignments/assignment 2 - group/TensorFlow-2.x-YOLOv3-master/model_data/mars-small128.pb" ##'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'mp4v') ##(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 print("FPS:::::" + str(fps)) NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) frame_idx = 0 while True: _, frame = vid.read() try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) #t1 = time.time() #pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms', sigma=0.4) print(np.argmax(pred_bbox[:, 5:], axis=-1)) # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) ##---esse eh o fidaputi features = np.array(encoder(original_frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] tracked_scores = [] for i, track in enumerate(tracker.tracks): if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function try: tracked_scores.append(track.Confidence) ##new except: print("skip") # draw detection on frame draw_bbox(image, bboxes, CLASSES=YOLO_COCO_CLASSES, show_label=True, show_confidence = True, Text_colors=(255,255,0), rectangle_colors='', tracking=False) frame_idx += 1 print("------frame_idx-------" + str(frame_idx)) image = draw_bbox(original_frame, results_csv, tracked_bboxes, frame_idx, tracked_scores, show_label=True, show_confidence=True, CLASSES=CLASSES, tracking=True) ##new t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) ##image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break results_csv.close() cv2.destroyAllWindows()
tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() class_name = track.get_class() tracking_id = track.track_id index = key_list[val_list.index(class_name)] tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) image = draw_bbox(original_frame, tracked_bboxes, CLASSES=YOLO_COCO_CLASSES, tracking=True) t3 = time.time() times.append(tEnd - tStart) times_2.append(t3 - tStart) ms = sum(times) / len(times_2) * 1000 ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) print("Time:{:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2))
def Object_tracking(Yolo, video_path, input_size=416, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, Track_only=["person"]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'data/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] vid = cv2.VideoCapture(video_path) # detect on video # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) for x in range(120): _, frame = vid.read() try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) #t1 = time.time() #pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) out_tracked_bboxes.append(tracked_bboxes) print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2))