def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False file_path = 'video.webm' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov3.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs = yolo.detect_image(image)[0] confidence = yolo.detect_image(image)[1] features = encoder(frame, boxs) detections = [ Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] ## Retrieving object moving sequence from text file generated from record_positions.py object_move_pos_strings = [] object_move_pos = [] #Retreive object pos from .txt file. with open("/home/pierre/MasterThesisObjectTracking/positions.txt", 'r') as object_pos_file: for line in object_pos_file: pos = str.split(line) object_move_pos_strings.append(pos) object_pos_file.close() #Convert positions to integers for element in object_move_pos_strings: ints = [int(item) for item in element] object_move_pos.append(ints) #Boolean for when the object mover sequence is still running still_moving = True #Boolean that turns true when object has finished to move end_trial = False moving_trigger = 0 #Dynmixel setup # connecting Ax12.open_port() Ax12.set_baudrate() #Declaring servomotor for pan and tilt camera_panning_motor = Ax12(1) camera_tilt_motor = Ax12(2) # Declaring servomotor for object object_pan1 = Ax12(3) object_tilt1 = Ax12(4) object_tilt2 = Ax12(5) object_pan2 = Ax12(6) start_pan = 500 start_tilt = 230 # define video codec fourcc = cv2.VideoWriter_fourcc(*'XVID') bound_box = [0,0,0,0] frames_per_second = 0 time.sleep(0.2) #Set start position camera_panning_motor.set_position(start_pan) camera_tilt_motor.set_position(start_tilt) #Set start position for object move_object_motors(object_move_pos[0], speed= 500) camera_panning_motor.set_torque_limit(1023) camera_tilt_motor.set_torque_limit(1023) camera_panning_motor.set_moving_speed(1000) camera_tilt_motor.set_moving_speed(1000) one_degree = int(camera_panning_motor.get_torque_limit()/300) # Create class instance of second counter main_count = timer_sec.SecondCounter() # Create class instance of second counter when tracker is in ROI roi_count = timer_sec.Counter_tread() roi_seconds = 0 returned_roi_seconds = 0 in_roi = False been_in_roi = False first_entry = True start_count = True left_roi = False roi_seconds_start = 0 roi_seconds_stop = 0 last_move_time = 0 roi = 35 margin = 70 tracksuccess = False track_lost = False refound = 0 first_detection = True #Count for iterating in object position list next_object_pos = 0 object_speed = 100 move_trigger = 0 #Function that returns center of ROI def goalPosition (bbox): x, y = int(bbox[0]), int(bbox[1]) w = int(bbox[2]) - int(bbox[0]) h = int(bbox[3]) - int(bbox[1]) center_x = int(x + w/2) center_y = int(y + h/2) center = [center_x, center_y] return center # Function that calculates distance between center of frame and center of ROI def calculateDistance(cam_center, track_center): ROI = False x_diff = cam_center[0] - track_center[0] y_diff = cam_center[1] - track_center[1] difference = [x_diff, y_diff] if abs(x_diff) <= roi and abs(y_diff) <= roi: ROI = True return difference, ROI # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) import os import shutil output_dir = "{}/outputs".format(os.getcwd()) if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) frames_dir = "{}/frames".format(output_dir) text_dir = "{}/txt_files".format(output_dir) video_dir = "{}/video".format(output_dir) os.makedirs(frames_dir) os.makedirs(text_dir) os.makedirs(video_dir) # Create output return_value, frame = vid.read() videOutput = cv2.VideoWriter("{}/output.avi".format(video_dir), fourcc, 30, (frame.shape[1], frame.shape[0])) videoOutput_no_box = cv2.VideoWriter("{}/output_no_graphics.avi".format(video_dir), fourcc, 30, (frame.shape[1], frame.shape[0])) frame_num = 0 # start the main count of seconds main_count.start() # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num +=1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() no_graphics = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) videoOutput_no_box.write(no_graphics) cv2.imwrite("{}/frame_{}.jpg".format(frames_dir,frame_num), no_graphics) #Checks if where still moving positions for the object if next_object_pos < len(object_move_pos): print(f'objec_move_pos length = {len(object_move_pos)}') still_moving = True else: print("Object moving sequence is finished") if still_moving: move_finished_time = main_count.peek() still_moving = False if still_moving: move_object_motors(object_move_pos[next_object_pos], speed= object_speed) # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file #allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['cell phone', 'cup'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) #Drawing a circle in the center of the frame camera_center = [int(frame.shape[1]/2), int(frame.shape[0]/2)] cv2.circle(frame, (camera_center[0], camera_center[1]), radius= 1, color= (0, 255, 0), thickness= 10) distance = [0,0] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: track_lost = True tracksuccess = False bound_box = [0, 0, 0, 0] continue bbox = track.to_tlbr() class_name = track.get_class() tracksuccess = True if tracksuccess and track_lost: refound +=1 track_lost = False first_detection = False # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) #Calulate center of bounding box and draw circle center = goalPosition(bbox) cv2.circle(frame, (center[0], center[1]), radius =0, color= color, thickness=10) print("Bbox: {0}" .format(bbox)) #Storing value for the output.csv file bound_box = bbox distance, in_roi = calculateDistance(camera_center, center) moveMotors(distance[0], distance[1], camera_panning_motor, camera_tilt_motor, roi, margin) #difference in x-cordinates rescpeticly y-cordinates # if enable info flag then print details about each track if FLAGS.info: print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) frames_per_second = fps #Fps text cv2.putText(frame, "FPS {0}".format(str(int(fps))), (75, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) #moving interval in seconds moving_interval = 3 if still_moving: if main_count.peek() > 1 and main_count.peek() < 3: move_trigger = main_count.peek() elif main_count.peek() > move_trigger +0.02: move_trigger += moving_interval next_object_pos += 1 else: print("Object move coreography is finished") if main_count.peek() > move_finished_time + 2: end_trial = True ##Starts timer when the ROI is centered if in_roi and tracksuccess: if start_count: roi_count.start() start_count = False if first_entry: roi_seconds = roi_count.peek() if left_roi: roi_seconds_start = roi_count.peek() been_in_roi = True left_roi = False ##Stops timer when ROI is not centered and accumulate time in counter if been_in_roi and tracksuccess and in_roi is False : print("roi_stop: {}".format(roi_seconds_stop)) if first_entry: roi_seconds_stop = 0 else: roi_seconds_stop = roi_count.peek() - roi_seconds_start roi_seconds += roi_seconds_stop been_in_roi = False first_entry = False left_roi = True ## Graphics to show timer counters cv2.putText(result, "Sec {0}".format(str(int(main_count.peek()))), (500, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 255), 2) cv2.putText(result, "Sec in ROI {0}".format(str(int(roi_seconds))), (400, 85), cv2.FONT_HERSHEY_COMPLEX, 1, (254, 0, 255), 2) #cv2.imshow("Frame", frame) videOutput.write(result) mean_roi_sec = round((roi_seconds / main_count.peek()) * 100, 2) camera_pos = [camera_panning_motor.get_position(), camera_tilt_motor.get_position()] object_pos = [Ax12(3).get_position(),Ax12(4).get_position(),Ax12(5).get_position(),Ax12(6).get_position()] #Create a csv to store results with open('outputs/output.csv', 'a', newline='') as csvfile: fieldnames = ['Frame', 'FPS', 'Distance_x', 'Distance_y', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax', 'Prop_roi(%)', 'Time', 'Roi_time', 'In_roi', 'Tracking_success', 'Refound_tracking', 'camera_pan', 'camera_tilt', 'object_pan1', 'object_tilt1', 'object_tilt2', 'object_pan2', 'img_center_x', 'img_center_y'] csv_writer = csv.DictWriter(csvfile, fieldnames=fieldnames) #Fill in values in csv file if frame_num ==1: csv_writer.writeheader() csv_writer.writerow({'Frame': str(frame_num), 'FPS': str(int(frames_per_second)) ,'Distance_x': str(distance[0]), 'Distance_y': str(distance[1]), 'bbox_xmin': str(int(bound_box[0])), 'bbox_ymin': str(int(bound_box[1])), 'bbox_xmax': str(int(bound_box[2])), 'bbox_ymax': str(int(bound_box[3])), 'Prop_roi(%)': str(mean_roi_sec), 'Time': str(round(main_count.peek(), 2)), 'Roi_time': str(round(roi_seconds,2)), 'Tracking_success': str(tracksuccess), 'Refound_tracking': str(refound-1), 'camera_pan': str(camera_pos[0]), 'camera_tilt':str(camera_pos[1]), 'object_pan1':str(object_pos[0]), 'object_tilt1':str(object_pos[1]), 'object_tilt2':str(object_pos[2]), 'object_pan2':str(object_pos[3]), 'img_center_x': str(camera_center[0]), 'img_center_y': str(camera_center[1])}) csvfile.close() if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q') or end_trial: break cv2.destroyAllWindows() videOutput.release() videoOutput_no_box.release() Ax12.close_port() # stop the count and get elapsed time main_seconds = main_count.finish() roi_count.finish()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 0.7 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True webcam_flag = False # some links from earthcam https://github.com/Crazycook/Working/blob/master/Webcams.txt https://www.vlcm3u.com/web-cam-live/ video_url = 'https://videos3.earthcam.com/fecnetwork/9974.flv/chunklist_w1421640637.m3u8' # NYC # video_url = 'https://videos3.earthcam.com/fecnetwork/hdtimes10.flv/chunklist_w48750913.m3u8' if webcam_flag: video_capture = cv2.VideoCapture(0) else: video_capture = cv2.VideoCapture() video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 2) video_capture.open(video_url) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, np.array(boxs)[:, 0:4].tolist()) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, str(det.score), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): start = time.time() #Definition of the parameters max_cosine_distance = 0.5 #0.9 余弦距离的控制阈值 nn_budget = None nms_max_overlap = 0.3 #非极大抑制的阈值 counter = [] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True #video_path = "../../yolo_dataset/t1_video/test_video/det_t1_video_00025_test.avi" video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter( './output/' + args["input"][43:57] + "_" + args["class"] + '_output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs, class_names = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue #boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) i += 1 #bbox_center_point(x,y) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) #track_id[center] pts[track.track_id].append(center) thickness = 5 #center point cv2.circle(frame, (center), 1, color, thickness) #draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2) count = len(set(counter)) cv2.putText(frame, "Total Object Counter: " + str(count), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(i), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) cv2.namedWindow("YOLO3_Deep_SORT", 0) cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768) cv2.imshow('YOLO3_Deep_SORT', frame) if writeVideo_flag: #save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 #print(set(counter)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break print(" ") print("[Finish]") end = time.time() if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters rospy.init_node('tracker', anonymous=True) rospy.Subscriber("/new_image_raw", Image, callback) max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') #vid = cv2.VideoCapture(0) # vid = cv_image #cv_image = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = 720 height = 862 fps = 2 codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: #_, img = vid.read() img = cv_image #cv2.imshow("loading image", cv_image) #image is comming over here fine if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break print("shape = {} , ".format(img.shape)) print("dtype = {} , ".format(img.dtype)) #img = np.array(img, dtype=np.uint16) img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in, steps=1) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) #cv2.imshow("output", img.astype('float32')) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break # vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
##If error occurs, need to set if statement for zero cases of detection detections = [ Detection(box, det_class, feature) for box, det_class, feature in zip( personDetected_box, det_classes, features) ] detections_box = np.array([dlc.tlwh for dlc in detections]) detections_score = np.array([dlc.confidence for dlc in detections]) detections_indices = preprocessing.non_max_suppression( detections_box, nms_max_overlap, detections_score) detections = [detections[i] for i in detections_indices] # tracker.predict() tracker.update(detections) cv2.putText(frame, str(frameIdx), (0, 30), 0, 5e-3 * 200, (0, 255, 0), 2) #To check frame number ''' Support Visualization ''' for track in tracker.tracks: bbox = [max(0, int(x)) for x in track.to_tlbr()] cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 3) cv2.putText(frame, str(track.track_id), (bbox[0], bbox[1] + 30), 0, 5e-3 * 200, (0, 255, 0), 2) #frame=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
class VideoStructor(object): def __init__(self, detector, extractor, cfg): self.detector = detector self.extractor = extractor self.cfg = cfg self.tracker = Tracker(self.cfg.tracktor.metric) self.results = {} def init_tracker(self): self.tracker = Tracker(self.cfg.tracktor.metric) self.results = {} def __call__(self): pass def step(self, frame): img = cv2.imread(frame.filename) det_cfg = self.cfg.tracktor.detection if det_cfg.default: bbox_result = frame.detections[:, 2:7] bbox_result[:, 2:4] += bbox_result[:, :2] else: bbox_result, other_cls_result = self.get_bboxes(img) valid_inds = bbox_result[:, 4] > det_cfg.min_conf bbox_result = bbox_result[valid_inds] _, keep_inds = nms(bbox_result, det_cfg.nms_iou_thr) bbox_result = bbox_result[keep_inds] features = self.get_features(img, bbox_result) bbox_result[:, 2:4] -= bbox_result[:, :2] detections = [] for bbox, feat in zip(bbox_result, features): detections.append(Detection(bbox[0:4], bbox[4], feat)) self.tracker.predict() self.tracker.update(detections) frame_results = self.tracker.get_results() frame_results = frame_results if frame_results else np.zeros((0, 5)) self.results[frame.frame_idx] = np.array(frame_results) def get_bboxes(self, img): bbox_result = self.detector(img) needed_labels = self.cfg.detection.labels dets_for_reid = bbox_result[needed_labels[0] - 1] other_dets = [] for i in needed_labels[1:]: other_dets.append(bbox_result[i - 1]) return dets_for_reid, tuple(other_dets) def get_features(self, img, bbox): if bbox.shape[1] <= 5: features = self.extractor(img, bbox[:, 0:4]).cpu().numpy() else: features = bbox[:, 5:] return features def get_results(self): return self.results
def main(video_path=0, save_path=None): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None # openpose model = 'mobilenet_thin' e = TfPoseEstimator(get_graph_path(model), target_size=(432, 368)) # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) video = Video(video_path) # video_capture = cv2.VideoCapture() # 'data/person.mp4' w = video.w h = video.h tracker = Tracker(metric, img_shape=(w, h), max_eu_dis=0.1 * np.sqrt((w ** 2 + h ** 2))) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(save_path, fourcc, 7, (w, h)) list_file = open(output_dir + 'detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video.video_capture.read() # frame shape 640*480*3 if ret is not True or frame is None: break t1 = time.time() image = Image.fromarray(frame) # boxes, scores, _ = yolo.detect_image(image) humans = get_keypoints(frame, e) boxes = [] for human in humans: bbox = human.get_upper_body_box(w, h, 1) if bbox is not None: bbox['w'] = max(bbox['w'], 6) bbox['h'] = max(bbox['h'], 6) boxes.append([bbox['x'], bbox['y'], bbox['w'], bbox['h']]) features = encoder(frame, boxes) # 提取到每个框的特征 detections = [Detection(bbox_and_feature[0], 1, bbox_and_feature[1]) for idx, bbox_and_feature in enumerate(zip(boxes, features))] # 保存到一个类中 # Call the tracker tracker.predict() tracker.update(detections, np.asarray(image)) frame = draw_humans(frame, humans, imgcopy=False) for index, track in enumerate(tracker.tracks): if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, track.color, 2) # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if save_path is not None: # Define the codec and create VideoWriter object # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxes) != 0: for i in range(0, len(boxes)): list_file.write( str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str( boxes[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 # print("fps= %f" % fps) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video.video_capture.release() if save_path is not None: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = os.getenv( 'HOME' ) + '/st_mini/src/scout_mini_ros/scout_bringup/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size if use_webcam: video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture if use_webcam: try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) # 로봇 모터 제어를 위한 초깃값 설정 x = 0 y = 0 z = 0 th = 0 speed = 0.7 turn = 1 # 변수 추가 cx, cy, h = 0, 0, 0 frame_num = 0 # Depth camera class 불러오기 if not use_webcam: dc = DepthCamera() # 장애물 영역 기본값 받아오기 default = Default_dist() # ROS class init go = scout_pub_basic() rate = rospy.Rate(60) # 타겟 아이디 초깃값(sub 객체가 있는 사람 id를 아래에서 할당 예정) target_id = False # while video is running while not rospy.is_shutdown(): if use_webcam: return_value, frame = vid.read() else: # depth camera 사용 return_value, depth_frame, frame = dc.get_frame() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 if not use_webcam: # 장애물 회피를 위한 ROI 디폴트 세팅하기 (현재는 10프레임만) 추가 if frame_num < 11: default.default_update(depth_frame) continue print('Frame #: ', frame_num) # frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person', 'cell phone', 'tie', 'stop sign'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # <st-mini 제어를 위한 Publisher code> go.update(x, y, z, th, speed, turn) go.sendMsg() # 좌/우 회전 한곗값 설정 left_limit = frame.shape[1] // 2 - 100 right_limit = frame.shape[1] // 2 + 100 # # 좌/우 회전 속도 증가 구간 설정 # left_max = frame.shape[1]//4 # right_max = (frame.shape[1]//4)*3 # 로봇의 최대,최소 속도 설정 # <!--선속도--!> max_speed = 0.8 min_speed = 0.5 # <!--각속도--!> max_turn = 1.3 min_turn = 0.8 # <!-┌---------------------------------- sub 객체를 이용한 person id 할당하기 -----------------------------------┐!> # track id list 만들기 track_id_list = [] # person id to bbox dict 만들기 person_id_to_bbox = {} """아래와 같은 형식의 데이터 person_id_to_bbox = { 1 : [100,200,300,400] # id : bbox } """ # recognition bbox list 만들기 recognition_bbox_list = [] # 인식용 객체(인식용 객체가 사람 bbox 안에 있다면 그 사람의 id를 계속 추적, 이후 target lost를 하게 되면 동일하게 인식 가능) recognition_object = 'tie' # track id list 만들기 for track in tracker.tracks: class_name = track.get_class() track_id = track.track_id bbox = track.to_tlbr() # track id list 만들기 track_id_list.append(track.track_id) if class_name == 'person': person_id_to_bbox[track_id] = bbox elif class_name == recognition_object: recognition_bbox_list.append(bbox) print('person_id_to_bbox: {}, recognition_bbox_list: {}'.format( person_id_to_bbox, recognition_bbox_list)) # person bbox 내부에 recognition object bbox가 있으면 해당 person id를 계속 추적하도록 target_id 할당 if target_id == False: # False라면 사람 id를 할당해야한다. # try: for person_id, person_bbox in person_id_to_bbox.items(): # person_bbox = list(map(int, person_bbox)) for rec_bbox in recognition_bbox_list: # rec_bbox = list(map(int, rec_bbox)) # if person_bbox[0] <0: person_bbox[0] = 0 # elif person_bbox[1] < 0: person_bbox[1] = 0 # elif person_bbox[2] > frame.shape[0]: person_bbox[2] = frame.shape[0] # elif person_bbox[3] > frame.shape[1]: person_bbox[3] = frame.shape[1] if rec_bbox[0] >= person_bbox[0] and rec_bbox[ 1] >= person_bbox[1] and rec_bbox[2] <= person_bbox[ 2] and rec_bbox[3] <= person_bbox[3]: target_id = person_id print('target id 할당 성공') else: target_id = False print('target id 할당 실패') print('target_id: ', target_id) # <!-└--------------------------------- sub 객체를 이용한 person id 할당하기 ----------------------------------┘!> # 추적 알고리즘 if len(tracker.tracks) == 0: # 추적할 객체가 없다면 정지 key = 'stop' print('There are no objects to track.') else: # 추적할 객체가 있다면 동작 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue track_id = track.track_id bbox = track.to_tlbr() class_name = track.get_class() if target_id == track_id and class_name == 'person': # cx, cy 계산 추가 w, h = int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1]) cx, cy = int(w / 2 + bbox[0]), int(h / 2 + bbox[1]) # 사람과 로봇의 거리 person_distance person_distance = person_dist(depth_frame, cx, cy, h) print('사람과의 거리: ', person_distance) # 직진 안전 구간 최대/최소값 stable_max_dist = 2500 stable_min_dist = 2000 if person_distance < stable_min_dist: print('Too Close') key = 'stop' else: """ depth값 활용 1. Target과의 거리[전진] 1) 적정거리: 1.5m ~ 2.0m --> linear.x = 1 2) 위험거리: ~1.5m --> linear.x = 0 3) 추격거리: 2.0m~ --> linear.x += 0.2 (적정거리가 될 때까지) 2. Target의 중심점을 이용해 좌우 회전 1) 중심점 cx, cy는 아래와 같이 구할 수 있다. width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] cx = int(width/2 + bbox[0]) cy = int(height/2 + bbox[1]) 좌우 판단이기 때문에 cx만 사용. 2) cx값 설정 중 주의 사항 target이 화면 밖으로 점점 나갈수록 bbox의 좌측 상단 x좌표는 음수 혹은 frame width(여기서는 640)보다 커질 수 있다. 즉, cx의 값을 설정할 때 bbox[0]값이 음수 또는 640을 초과하면 좌우 회전 즉시 실시해야 함. depth camera를 통해 장애물 유무를 먼저 판단하고 없다면 Target과의 거리/방향 측정 후 최종 발행값 결정. """ # 좌/우 회전 속도 증가 구간 설정 left_max = frame.shape[1] // 4 right_max = (frame.shape[1] // 4) * 3 max_speed = 1.5 min_speed = 1.2 """ 정지 조건(3/3): 장애물이 있을 때, 정확히는 정지가 아닌 회피 기동 obstacle_detect 함수는 장애물이 있을때만 key에 값을 할당한다. """ # 장애물 회피용 ROI distance로 left, right string 받아오기 # if person_distance > stable_max_dist: # key = obstacle_detect(default, depth_frame) cv2.rectangle(frame, (cx + 10, cy - (h // 5) + 10), (cx - 10, cy - (h // 5) - 10), (255, 0, 0), 5) # 장애물이 없다면 사람 따라가기 # if key == None: key, speed, turn = drive(cx, left_limit, right_limit, turn, speed) # key,speed,turn = drive2(cx, left_limit, right_limit, turn, frame, speed, max_speed, max_turn) # key,speed,turn = drive3(cx, left_limit, right_limit, turn, frame, speed, max_speed, min_speed, max_turn, stable_min_dist, stable_max_dist, person_distance, start_speed_down=300) # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # 주행 알고리즘(drive)를 거치고 나온 속도/방향을 로봇에 전달 x, y, z, th, speed, turn = key_move(key, x, y, z, th, speed, turn) print('key: ', key) print('key_type: ', type(key)) print('x: {}, y: {}, th: {}, speed: {}, turn: {}'.format( x, y, th, speed, turn)) # 화면 중심 표시 cv2.circle(frame, (320, 240), 10, (255, 255, 255)) # 좌우 회전 구분선 그리기 cv2.line(frame, (left_limit, 0), (left_limit, frame.shape[0]), (255, 0, 0)) cv2.line(frame, (right_limit, 0), (right_limit, frame.shape[0]), (255, 0, 0)) # ROS Rate sleep rate.sleep() print('track id list: ', track_id_list) ''' box_center_roi = np.array((depth_frame[cy-10:cy+10, cx-10:cx+10]),dtype=np.float64) cv2.rectangle(frame, (cx-10, cy+10), (cx+10, cy-10), (255, 255, 255), 2) ''' safe_roi = np.array([[400, 400], [240, 400], [160, 480], [480, 480]]) #safe_roi = np.array([[240, 420], [400, 420], [480, 160], [480, 480]]) cv2.polylines(frame, [safe_roi], True, (255, 255, 255), 2) cv2.rectangle(frame, (205, 445), (195, 435), (255, 0, 0), 5) cv2.rectangle(frame, (245, 405), (235, 395), (255, 0, 0), 5) cv2.rectangle(frame, (405, 405), (395, 395), (255, 0, 0), 5) cv2.rectangle(frame, (445, 445), (435, 435), (255, 0, 0), 5) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # depth map을 칼라로 보기위함 # depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_frame, alpha=0.03), cv2.COLORMAP_JET) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break dc.release() cv2.destroyAllWindows()
def main(): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False file_path = 'model_data/input_video.avi' if asyncVideo_flag : video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('model_data/output_yolov3.mp4', fourcc, 25, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() net = load_net(b"model_data/yolov3-tiny.cfg", b"model_data/yolov3-tiny.weights", 0) meta = load_meta(b"model_data/coco.data") while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # frame = cv2.resize(frame, (416, 416)) boxes, confidences = detect(net, meta, frame) print(boxes) # print(boxes, confidences) boxes = [convertBack(float(box[0]), float(box[1]), float(box[2]), float(box[3])) for box in boxes] # boxes, confidences = yolo.detect_image(image) # print(boxes) # print(confidences) # boxes = np.array(boxes) # confidences = np.array(confidences) # # # Run non-maxima suppression. # indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, confidences) # # boxes = [boxes[i] for i in indices] # confidences = [confidences[i] for i in indices] # time1 = time.time() features = encoder(frame, boxes) # time2 = time.time() detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxes, confidences, features)] # Call the tracker tracker.predict() tracker.update(detections) # time3 = time.time() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0,255,0),2) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) # time4 = time.time() # # time_sum = time4 - t1 # print("time:", (time1-t1)/time_sum, (time2-time1)/time_sum, (time3-time2)/time_sum, (time4-time3)/time_sum) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # load configuration for object detector input_size = FLAGS.size video_path = FLAGS.video saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc("XVID") out = cv2.VideoWriter("demo.mp4", codec, fps, (width, height)) frame_index = -1 # while video is running # swimming re-id model = load_model('best_model_triplet.hdf5', compile=False) le = pickle.loads(open('le.pickle', "rb").read()) data = pickle.loads(open('recognizer.pikle', "rb").read()) img_count = 0 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: print('Video has ended or failed, try a different video format!') break image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) flag_ok = 0 # map_reid = {} if img_count < 30 or img_count % 30 == 0: map_reid = reid(tracker, data, frame, model, le) img_count += 1 # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() try: # ID_fix = str(map_reid[track.track_id]) # color = colors[int(ID_fix) % len(colors)] # color = [i * 255 for i in color] if map_reid[track.track_id] == 0: color = (0, 0, 0) elif map_reid[track.track_id] == 1: color = (255, 0, 0) elif map_reid[track.track_id] == 2: color = (255, 240, 0) elif map_reid[track.track_id] == 3: color = (0, 255, 0) elif map_reid[track.track_id] == 4: color = (0, 255, 255) elif map_reid[track.track_id] == 5: color = (0, 0, 255) elif map_reid[track.track_id] == 6: color = (127, 0, 255) elif map_reid[track.track_id] == 7: color = (255, 182, 190) else: color = (0, 0, 0) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[2]), int(bbox[3] + 50)), color, 2) cv2.putText(frame, class_name + " ReID-" + str(map_reid[track.track_id]), (int(bbox[0]), int(bbox[1] - 55)), 0, 0.7, color, 2) flag_ok = 1 except Exception as e: # print("Exception", map_reid, track.track_id) # print("Exception", e) map_reid = reid(tracker, data, frame, model, le) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() cv2.putText(frame, class_name + " ReID-" + str(map_reid[track.track_id]), (int(bbox[0]), int(bbox[1] - 55)), 0, 0.7, color, 2) print("after", map_reid, track.track_id) pass # --------Threading------------# if flag_ok == 1: thread_num = 32 threads = [None] * thread_num original_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) f = original_frame n_t = len(tracker.tracks) s = 0 e = n_t // thread_num for i in range(thread_num): if i == thread_num - 1: threads[i] = threading.Thread(target=crop_and_recognize, args=(tracker.tracks[:-1], original_frame, map_reid,)) else: threads[i] = threading.Thread(target=crop_and_recognize, args=(tracker.tracks[s:e], original_frame, map_reid,)) s = e e += e for t in threads: t.start() for i in range(len(tracker.tracks)): track = tracker.tracks[i] bbox = track.to_tlbr() try: ID = map_reid[track.track_id] except Exception as e: # print("map_reid, track.track_id", map_reid, track.track_id) # print("Exception 2", e) pass if not track.is_confirmed() or track.time_since_update > 1: continue # color = colors[ID % len(colors)] # color = [i * 255 for i in color] # cv2.rectangle(f, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) if swimmer_style.get(ID) is not None: (sstyle, conf) = swimmer_style[ID] cv2.putText(f, str(sstyle) + ": " + str(int(round(conf, 2) * 100)) + "%", (int(bbox[0]), int(bbox[1] - 35)), 0, 0.7, (255, 255, 255), 2) frame = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) else: print("NOT OKAY", "map_reid, track.track_id", map_reid, track.track_id, "len(tracker.tracks)", len(tracker.tracks)) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) frame_index = frame_index + 1 if cv2.waitKey(1) & 0xFF == ord('q'): break # cv2.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # Calculate cosine Distance Metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # Flags for process tracking = True # Set False if you only want to do detection writeVideo_flag = True # Set False if you don't want to write frames locally asyncVideo_flag = False # It uses asynchronous processing for better FPS :Warning: Shuttering Problem # Video File Path file_path = './Input/video1.avi' # Check if asyncVideo flag set to True if asyncVideo_flag : video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_file = os.path.basename(file_path)[:-4] out = cv2.VideoWriter('./Output/' + output_file + "_output.mp4", fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # Capture frames if ret != True: break t1 = time.time() # bgr to rgb frame conversion image = Image.fromarray(frame[...,::-1]) # YOLOv4 Detection boxes, confidence, classes = yolo.detect_image(image) if tracking: # Encodes the frame and boxes for DeepSORT features = encoder(frame, boxes) # DeepSORT Detection detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(boxes, confidence, classes, features)] else: # Only YOLOv4 Detection detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # Draw white bbox for DeepSORT cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,(0, 255, 0), 1) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) # Check the class for colored bbox if len(classes) > 0: cls = det.cls center_bbox = (int(bbox[2]), int(bbox[2])) if str(cls) == 'person': # Draw Blue bbox for YOLOv4 person detection cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (219, 152, 52), 2) elif str(cls) == 'backpack' or 'handbag' or 'suitcase': # Draw Orange bbox for YOLOv4 handbag, backpack and suitcase detection cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (65, 176, 245), 2) if not asyncVideo_flag: fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) cv2.putText(frame, "GPU: NVIDIA GEFORCE GTX 960M", (5, 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) cv2.putText(frame, "FPS: %.2f" % fps, (5, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) # draw the timestamp on the frame timestamp = datetime.datetime.now() ts = timestamp.strftime("%d/%m/%Y, %H:%M:%S") cv2.putText(frame, ts, (5, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
class ObjectTracking(object): def __init__(self, name): self.max_iou_distance = rospy.get_param('max_iou_distance', 0.7) self.max_age = rospy.get_param('max_age', 30) self.n_init = rospy.get_param('n_init', 3) self.metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, None) self.tracker = Tracker(self.metric, self.max_iou_distance, self.max_age, self.n_init) rospy.Subscriber("image_object", ObjectArray, self.__callback1) self.bridge = CvBridge() self.image_sub = rospy.Subscriber("image_raw", Image, self.__image_callback) self.pub = rospy.Publisher('object_tracked', ObjectArray, queue_size=10) self.image_pub = rospy.Publisher("object_tracked_image", Image, queue_size=10) self.cv_image = None self.visualizer = visualization.Visualization((960, 540), update_ms=100) self.mot_tracker = sort.Sort(iou_th=0.1) self.seedling_id_list = [] self.seedling_lifetime = dict() def __callback(self, data): def create_detections(objects): detection_list = [] for obj in objects: bbox = np.array( [obj.x_offset, obj.y_offset, obj.width, obj.height]) confidence = obj.score detection_list.append(Detection(bbox, confidence, [])) return detection_list print("Received detection results") # Load image and generate detections. detections = create_detections(data.objects) # Update tracker. self.tracker.predict() new_ids = self.tracker.update(detections) print(new_ids) for i in range(data.size): data.objects[i].id = new_ids[i] self.pub.publish(data) self.visualizer.set_image(self.cv_image) self.visualizer.draw_objects(data.objects) try: self.image_pub.publish( self.bridge.cv2_to_imgmsg(self.cv_image, "bgr8")) except CvBridgeError as e: print(e) def __callback1(self, data): def create_detections(objects, width, height): dets = [] for obj in objects: det = [ obj.x_offset / width, obj.y_offset / height, (obj.x_offset + obj.width) / width, (obj.y_offset + obj.height) / height, obj.score ] dets.append(det) dets = np.array(dets) return dets dets = create_detections(data.objects, self.cv_image.shape[0], self.cv_image.shape[0]) # update trackers based on the current detection result new_ids, trackers = self.mot_tracker.update(dets) print("new_ids") print(new_ids) # update the tracker list print(trackers[:, 4]) for d_index, d in enumerate(trackers[:, 4]): if d not in self.seedling_id_list: self.seedling_id_list.append(d) # max_cls_id = len(seedling_id_list) cur_d = self.seedling_id_list.index(d) + 1 if cur_d not in list(self.seedling_lifetime.keys()): self.seedling_lifetime[cur_d] = 0 self.seedling_lifetime[cur_d] = self.seedling_lifetime[cur_d] + 1 for i in range(data.size): data.objects[i].id = new_ids[i] self.pub.publish(data) self.visualizer.set_image(self.cv_image) self.visualizer.draw_objects(data.objects) try: self.image_pub.publish( self.bridge.cv2_to_imgmsg(self.cv_image, "bgr8")) except CvBridgeError as e: print(e) print("============================") def __image_callback(self, image): try: self.cv_image = self.bridge.imgmsg_to_cv2(image, image.encoding) except CvBridgeError as e: print(e)
def YOLO(): global metaMain, netMain, altNames configPath = "./cfg/yolov3-spp.cfg" weightPath = "./yolov3-spp.weights" metaPath = "./cfg/coco.data" if not os.path.exists(configPath): raise ValueError("Invalid config path `" + os.path.abspath(configPath) + "`") if not os.path.exists(weightPath): raise ValueError("Invalid weight path `" + os.path.abspath(weightPath) + "`") if not os.path.exists(metaPath): raise ValueError("Invalid data file path `" + os.path.abspath(metaPath) + "`") if netMain is None: netMain = darknet.load_net_custom(configPath.encode("ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1 if metaMain is None: metaMain = darknet.load_meta(metaPath.encode("ascii")) if altNames is None: try: with open(metaPath) as metaFH: metaContents = metaFH.read() import re match = re.search("names *= *(.*)$", metaContents, re.IGNORECASE | re.MULTILINE) if match: result = match.group(1) else: result = None try: if os.path.exists(result): with open(result) as namesFH: namesList = namesFH.read().strip().split("\n") altNames = [x.strip() for x in namesList] except TypeError: pass except Exception: pass #cap = cv2.VideoCapture(0) #create overlay for computing heatmap overlay_empty = np.zeros((608, 608, 1), np.uint8) cap = cv2.VideoCapture("video_test/01.avi") #IMPORTANT cap.set(3, 1280) cap.set(4, 720) #print ("width", cap.get(3)) #print ("height", cap.get(4)) """ out = cv2.VideoWriter( "video_output/modded_2-5f_124_pytorch_MGN_thres00965_dthres0375_x30y160_age7000_cbox20-10w-5h-160.mp4", cv2.VideoWriter_fourcc(*"MP4V"), 20, (darknet.network_width(netMain), darknet.network_height(netMain))) print("Starting the YOLO loop...") """ out = cv2.VideoWriter( "txt_output_n50_0960/01.avi", cv2.VideoWriter_fourcc(*"MP4V"), 20, #IMPORTANT (darknet.network_width(netMain), darknet.network_height(netMain))) print("Starting the YOLO loop...") # Create an image we reuse for each detect darknet_image = darknet.make_image(darknet.network_width(netMain), darknet.network_height(netMain), 3) # ================= Definition of the parameters max_cosine_distance = 0.096 #IMPORTANT PARAMETER maxAge = 100 #IMPORTANT PARAMETER nn_budget = None nms_max_overlap = 1.0 # ================= Models location #IMPORTANT #model_filename = 'model_data/mars-small128.pb' #Deep Cosine Metric model model_filename = 'model_data/model_MGN.pt' #MGN model # ========================= Init MGN feature extractor extractor = Extractor(model_filename, use_cuda=True) #IMPORTANT # ========================= Init Deep Cosine feature extractor #encoder = gdet.create_box_encoder(model_filename,batch_size=1) #IMPORTANT metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=maxAge) fps = 0.0 counts = 1 ids = dict() b = set() #fi = open("txt_output_n50_0960/01.txt","w") while True: prev_time = time.time() ret, frame_read = cap.read() if ret != True: break if counts % 1 == 0: print("========FRAME========: ", counts) frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (darknet.network_width(netMain), darknet.network_height(netMain)), interpolation=cv2.INTER_LINEAR) height, width, channels = frame_resized.shape #print ("Shape of frame: ",frame_resized.shape) darknet.copy_image_from_bytes(darknet_image, frame_resized.tobytes()) boxs = darknet.detect_image(netMain, metaMain, darknet_image, thresh=0.375) boxes = [] for detection in boxs: if detection[0].decode() != "person": continue x, y, w, h = detection[2][0],\ detection[2][1],\ detection[2][2],\ detection[2][3] #xmin = int(round(float(x) - (float(w) / 2))) #ymin = int(round(float(y) - (float(h) / 2))) xmin, ymin, xmax, ymax = convertBack(int(x), int(y), int(w), int(h)) #========================= generate HEATMAP values ==================== x_heat = int(round((xmin + xmax) / 2)) overlay_empty[int(ymax) - 10:int(ymax) + 10, x_heat - 10:x_heat + 10] += 5 #====================================================================== cv2.rectangle(frame_resized, (xmin, ymin), (xmax, ymax), (255, 0, 255), 1) if (xmin > 100 and ymin > 10) and (xmax < width - 5 and ymax < height - 120): if (xmax - xmin) > 30 and (ymax - ymin) > 160: boxes.append([xmin, ymin, w, h]) pt1 = (100, 10) pt2 = (width - 5, height - 120) cv2.rectangle(frame_resized, pt1, pt2, (0, 255, 0), 1) # ==================== EXTRACT FEATURES ==================== #IMPORTANT #features = encoder(frame_resized,boxes) #Deep Cosine Metric extractor features = get_features(boxes, frame_resized, extractor, width, height) #MGN extractor # ============================================================ detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features) ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #update tracker tracker.predict() tracker.update(detections) # ====================== Fix the ID jumping problem ====================== if counts == 1: count = 1 for track in tracker.tracks: ids[track.track_id] = count count += 1 else: count = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue if track.track_id in ids: continue if len(list(ids.values())) != 0: count = max(list(ids.values())) ids[track.track_id] = count + 1 #print (ids) # ========================================================================= cv2.putText(frame_resized, "FrameID: " + str(counts), (350, 30), 0, 5e-3 * 150, (255, 255, 0), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue #print ("=======ID at the frame=======: ", track.track_id) bbox = track.to_tlbr() #heat map #print("x heat coordinate: ", x_heat) cv2.rectangle(frame_resized, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 0), 2) if track.track_id in ids: cv2.putText(frame_resized, str(ids[track.track_id]), (int(bbox[2]), int(bbox[3])), 0, 5e-3 * 250, (255, 255, 0), 2) if counts % 2 == 0: fi.write( str(counts) + " " + str(int(bbox[2])) + " " + str(int(bbox[3])) + " " + str(ids[track.track_id]) + " " + "\n") b.add(track.track_id) #if (np.allclose(overlay_empty[int(bbox[3])-5:int(bbox[3])+5,x_heat-5:x_heat+5,0],255)): continue #print (b) if len(list(ids.values())) != 0: text = "Num people: {}".format(max(list(ids.values()))) cv2.putText(frame_resized, text, (1, 30), 0, 5e-3 * 200, (255, 0, 0), 2) overlay_empty[np.where((overlay_empty[:, :, 0] > 200))] = 230 """ for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame_resized,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) """ # ============== BLENDING HEATMAP VALUES TO FRAME ============== im_color = cv2.applyColorMap(overlay_empty, cv2.COLORMAP_JET) im_color = cv2.blur(im_color, (10, 10)) #im_color[:,:,0]=0 image = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB) #image = cvDrawBoxes(detections, frame_resized) heat = cv2.addWeighted(image, 0.7, im_color, 0.3, 0) # =============================================================== #counts = counts + 1 out.write(heat) print("fps: ", 1 / (time.time() - prev_time)) print("\n") #fps = ( fps + (1./(time.time()-prev_time)) ) / 2 #print("fps= %f"%(fps)) #cv2.imshow('Demo', image) #cv2.imshow('', heat) #cv2.imshow('', cv2.resize(image,(1024,600))) if cv2.waitKey(1) & 0xFF == ord('q'): break counts = counts + 1 fps = fps + (1 / (time.time() - prev_time)) print("average fps: ", fps / counts) cap.release() out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 inputIndex = 0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 20, 30)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if class_name == "Person" or class_name == "person": inputIndex += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) ####### im = img[int(int(bbox[1])):int(int(bbox[3])), int(int(bbox[0])):int(int(bbox[2]))] ####### cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) ############## #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Cropped"+str(inputIndex)+".png", im) color = ('b', 'g', 'r') cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ## for channel, col in enumerate(color): # for histogram ## ax1 = plt.subplot(1, 1, 1) #ax2 = plt.subplot(1, 2, 2) ## histr = cv2.calcHist([im], [channel], None, [256], [0, 256]) #plt.plot(histr, color=col) #plt.plot(histr) #plt.xlim([0, 256]) #plt.title('Histogram for color scale picture') ## plt.axis('off') ## ax1.imshow(im) #ax2.plot(histr) ## plt.savefig("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\APlot"+str(inputIndex)+".png"); #cv2.imwrite("C:\Yolov3DeepSortPersonID\yolov3_deepsort\data\Final"+str(inputIndex)+".png", im) ################### ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def main(args, hyp): physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 counter = [] # deep_sort model_filename = 'saved_model/market1501' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # select yolo YOLO, input_details, output_details,saved_model_loaded = select_yolo(args, hyp) video_capture = cv2.VideoCapture(args.video) #Define the codec and create VideoWriter object if args.write_video: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('./output/output.avi', fourcc, 15, (w, h)) list_file = open('detection_rslt.txt', 'w') frame_index = -1 fps = 0.0 convert_class_name = load_class_name(args.data_root, args.class_file) while True: ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: break t1 = time.time() image = np.squeeze(frame) img = cv2.cvtColor(image.copy(),cv2.COLOR_BGR2RGB) / 255.0 h, w, _ = img.shape if h != args.img_size or w != args.img_size: img = cv2.resize(img, (args.img_size, args.img_size)) inf_time = time.time() boxes, confidence, class_names, valid_detections= model_detection(img, YOLO, args, input_details, output_details) print("inf time",time.time()-inf_time) tran_time = time.time() y_min, x_min, y_max, x_max = convert_to_origin_shape(boxes, None, None, h, w) w,h = x_max-x_min , y_max-y_min boxes = np.concatenate([x_min, y_min, w, h], -1) boxes = tf.squeeze(boxes, 0) # 100, 4 class_names = tf.squeeze(class_names, 0) print("tran time",time.time()-tran_time) st_t = time.time() features = encoder(frame, boxes[:valid_detections[0]]) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) print("tracker time",time.time()-st_t) i = int(0) indexIDs = [] draw_st = time.time() for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) cv2.putText(frame,str(track.track_id),(int(bbox[0]), int(bbox[1] -50)),0, 5e-3 * 150, (color),2) if len(class_names) > 0: cls = np.int8(class_names[0]) cv2.putText(frame, str(convert_class_name[cls]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (color),2) i += 1 center = (int(((bbox[0])+(bbox[2]))/2),int(((bbox[1])+(bbox[3]))/2)) pts[track.track_id].append(center) thickness = 5 # center point cv2.circle(frame, (center), 1, color, thickness) # draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness) if args.write_video: list_file.write(str(frame_index) + ',') list_file.write(str(track.track_id) + ',') b0 = str(bbox[0]) # .split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1] b1 = str(bbox[1]) # .split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1] b2 = str(bbox[2] - bbox[0]) # .split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1] b3 = str(bbox[3] - bbox[1]) list_file.write(str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3) + '\n') cv2.putText(frame, "Current Box Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2) cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3) cv2.namedWindow("YOLO4_Deep_SORT", 0) cv2.resizeWindow('YOLO4_Deep_SORT', 1024, 768) cv2.imshow('YOLO4_Deep_SORT', frame) if args.write_video: # save a frame out.write(frame) frame_index = frame_index + 1 fps = (fps + (1./(time.time()-t1))) / 2 print("draw_time",time.time()-draw_st) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
def realse(stack): # 解决GPU内存占用问题 import tensorflow as tf os.environ["CUDA_VISIBLE_DEVICES"] = '0' config = tf.ConfigProto() #config.gpu_options.per_process_gpu_memory_fraction = 0.3 config.gpu_options.allow_growth = True session = tf.Session(config=config) print('Begin to get frame......') max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # mtcnn detectors = mtcnn('onet') mtcnnDetector = MtcnnDetector(detectors=detectors, min_face_size=24, threshold=[0.9, 0.6, 0.7]) while True: if len(stack) >= 20: frame = stack.pop() #frame = cv2.resize(frame, (int(frame.shape[1]/3), int(frame.shape[0]/3))) frame = np.array(frame) # 输出tmp信息为[x,y,w,h]; x:检测框左上角点的x坐标;y:检测框左上角y坐标;w:框宽;h:框高; # 原MTCNN输出的信息为检测框一对角点的坐标信息(左上角点、右下角点),以及检测为人脸的概率值[x1,y2,x2,y2,置信度]; tmp, _ = mtcnnDetector.detect_follow(frame) features = encoder(frame, tmp) detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(tmp, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.5, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 1) cv2.imshow("Detected", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture("video.mp4") if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 #polys = np.array([[700,40] ,[400,950],[800,1080],[1880,1080],[1300, 0]], dtype=np.int32) count = 0 while True: ret, f = video_capture.read() # frame shape 640*480*3 rows, cols = f.shape[:2] M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 0, 1.2) #f = f * mask frame = cv2.warpAffine(f.astype(np.uint8), M, (cols, rows)) frame = cv2.resize(frame, (700, 500)) cv2.line(frame, (0, 250), (700, 250), (255, 255, 0), 3) #frame = frame[120:300+300, 60:300+500] if ret != True: break t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) #find object's centroid CoordXCentroid = int(bbox[0] + bbox[2]) / int(2) CoordYCentroid = int(bbox[1] + bbox[3]) / int(2) ObjectCentroid = (int(CoordXCentroid), int(CoordYCentroid)) cv2.circle(frame, ObjectCentroid, 1, (255, 255, 255), 5) distance = abs(250 - CoordYCentroid) #distance = int distance if (distance <= 5): count = count + 1 cv2.putText(frame, "Entrance: {}".format(count), (400, 50), 0, 5e-3 * 200, (255, 255, 255), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def get_detect(id_video): from collections import deque dq = deque(maxlen=1) t1 = threading.Thread(target=read_video, args=(dq,)) t1.start() # myout = save_video(video_reader, "./video.mp4", sz) max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 counter = [] my_track_dict = {} #save the info of track_id track_smooth_dict = {} #smooth the imshow pts = [deque(maxlen=30) for _ in range(9999)] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) list_file = open('detection_rslt.txt', 'w') my_num = 0 num = 0 t1 = time.time() fps = 0 fps1 = time.time() while True: if dq: img = dq.pop() else: key = cv2.waitKey(20) continue #TEST FPS fps2 = time.time() fps += 1 if fps2 - fps1 > 1: print(fps) fps = 0 fps1 = time.time() start_time = time.time() num += 1 #frame = cv2.imread(filename) #1、读取中文路径 #img = cv2.imdecode(np.fromfile(filename,dtype=np.uint8), cv2.IMREAD_COLOR) img_h, img_w, img_ch = img.shape print(img.shape) #2、防止裁剪或推理时把画的框裁剪上 show_image = img.copy() frame = img.copy() #the predict of person. boxs, confidence, class_names = [], [], [] out = preson_detect(img) #transform the object detection data to input tracter for i in range(len(out)): #========my_setting============== if out[i, 2] > 0.7: # print(out[i]) left = int(out[i, 3]*img_w) top = int(out[i, 4]*img_h) p_w = int(out[i, 5]*img_w-out[i, 3]*img_w) p_h = int(out[i, 6]*img_h-out[i, 4]*img_h) right = left + p_w bottom = top + p_h #detect the person in setting area. point1 = [int((left+right)/2), bottom] my_index = inner_point(point1) if my_index: boxs.append([left, top, p_w, p_h]) class_names.append("person") confidence.append(out[i, 2]) #start use the tracker features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] #setting detect time t2 = time.time() detect_time = t2-t1 #========my_setting============== control = 1 #detect one time in 3 second if detect_time > control: t1 = time.time() for det, track in zip(detections, tracker.tracks): # if not track.is_confirmed() or track.time_since_update > 1: if not track.is_confirmed() or track.time_since_update > 1: continue #print(track.track_id) #draw the boxs of object detection. pbox = det.to_tlbr() #cv2.rectangle(frame,(int(pbox[0]), int(pbox[1])), (int(pbox[2]), int(pbox[3])),(255,255,255), 2) my_key = str(int(track.track_id)) #========my_setting============== #if my_key increase or time lt 3s, will be re_detection. if my_key not in my_track_dict.keys() or detect_time>control: print(my_key) print(my_track_dict.keys()) #the code of processing the person box. label_dict = person_detect(img, pbox) if not label_dict: continue my_track_dict[my_key] = label_dict frame = draw_person_attr(frame, my_track_dict[my_key], pbox) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] #center_loc = [int((bbox[0]+bbox[2])/2), int((bbox[1]+bbox[3])/2)] if my_key not in track_smooth_dict.keys(): cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) track_smooth_dict[my_key] = bbox else: fbox = track_smooth_dict[my_key] a = int((bbox[0]+fbox[0])/2) b = int((bbox[1]+fbox[1])/2) c = int((bbox[2]+fbox[2])/2) d = int((bbox[3]+fbox[3])/2) cv2.rectangle(frame, (a, b), (c, d),(color), 3) track_smooth_dict[my_key] = bbox #draw the boxs of track. #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) cv2.putText(frame,str(track.track_id),(int(bbox[0]), int(bbox[1] -50)),0, 5e-3 * 150, (color),2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (color),2) i += 1 count = len(set(counter)) #draw the gurdline. draw_muti(frame) cv2.putText(frame, "Total Pedestrian Counter: "+str(count),(int(20), int(120)),0, 5e-3 * 200, (0,255,0),2) cv2.putText(frame, "Current Pedestrian Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2) #cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3) # cv2.namedWindow("YOLO4_Deep_SORT", 0) #cv2.resizeWindow('YOLO4_Deep_SORT', 640, 480) # cv2.imshow('YOLO4_Deep_SORT', frame) # myout.write(frame) frame = cv2.resize(frame, (640, 360)) ret2, jpeg = cv2.imencode('.jpg', frame) yield (b'--frame\r\n' b'application/octet-stream: image/jpeg\r\n\r\n' + jpeg.tobytes() + b'\r\n\r\n') end_time = time.time() my_one_time = (end_time - start_time) * 1000 print("====={}=====".format(num), my_one_time)
def eval(data_dir, output_dir, e_type='train', show=False, save_video=False): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) mot = Mot(data_dir, e_type) for index, det_dir in enumerate(mot.map_dir): seqs = os.listdir(det_dir + '/img1') dets = mot.load_detections(det_dir + '/det/det.txt') seqinfo = det_dir + '/seqinfo.ini' with open(seqinfo, 'r') as info: lines = info.readlines() lines = [line.split('=') for line in lines] lines = {line[0]: line[1] for line in lines if len(line) > 1} w, h, ext, frame_rate = int(lines['imWidth']), int( lines['imHeight']), lines['imExt'], int(lines['frameRate']) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(mot.sqm[index] + '.avi', fourcc, 7, (w, h)) tracker = Tracker(metric, img_shape=(w, h), max_eu_dis=0.1 * np.sqrt((w**2 + h**2))) with open(output_dir + mot.sqm[index] + '.txt', 'w') as f: for seq in seqs: print('track {}'.format(seq)) frame_id = int(seq.strip(ext)) img = np.asarray(Image.open(det_dir + '/img1/' + seq)) bboxes, scores = mot.detection_from_frame_id(dets, frame_id) features = encoder(img, bboxes) detections = [ Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1]) for idx, bbox_and_feature in enumerate( zip(bboxes, features)) ] tracker.predict() tracks_dets = tracker.update(detections, np.asarray(img)) frame = cv2.imread(det_dir + '/img1/' + seq) for td in tracks_dets: t = td[0] d = detections[td[1]].tlwh f.write('{},{},{},{},{},{},{},{},{},{}\n'.format( frame_id, t, d[0], d[1], d[2], d[3], -1, -1, -1, -1)) if show: bbox = detections[td[1]].to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "{}".format(t), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) if show: cv2.imshow('', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break if save_video: out.write(frame) if save_video: out.release()
class ObjectTracking(): def __init__(self, name, metric, yolo_interface, model_filename, \ nms_max_overlap, image_sub, image_pub, detected_pub): # Set the shutdown function (stop the robot) # rospy.on_shutdown(self.shutdown) self.name = name self._cv_bridge = CvBridge() self._max_overlap = nms_max_overlap # deep_sort self._encoder = gdet.create_box_encoder(model_filename, batch_size=1) self._tracker = Tracker(metric) self._yolo_interface = yolo_interface self._fps = 0.0 self._image_sub = rospy.Subscriber(image_sub, ROSImage, self.image_callback, queue_size=1) # self.thermal_sub = rospy.Subscriber(THERMAL_TOPIC, ROSImage, self.thermal_callback, queue_size=1) self._image_pub = rospy.Publisher(image_pub, ROSImage, queue_size=1) self._detected_pub = rospy.Publisher(detected_pub, DetectedFull, queue_size=1) # self._cv_image = None self._image_msg = None self._image_updated = False self.thread = threading.Thread(target=self.process, name=name) # self.lock = lock rospy.loginfo('##################### ' + name + ' Initialization Finished! #####################') def image_callback(self, image_msg): # self._cv_image = self._cv_bridge.imgmsg_to_cv2(image_msg, "bgr8") self._image_msg = image_msg self._image_updated = True # rospy.loginfo('call back in '+self.name) def process(self): # while not rospy.is_shutdown(): while not rospy.is_shutdown(): now_time = time.time() if not self._image_updated: rospy.loginfo( 'No image in ' + self.name + ' yet! Please check the existence of Subscrided topic.') rospy.sleep(2.13) continue self._image_updated = False image_msg = self._image_msg cv_image = self._cv_bridge.imgmsg_to_cv2(image_msg, "bgr8") image = Image.fromarray(cv_image) # self.lock.acquire() boxs = self._yolo_interface.detect_image(image) # self.lock.release() # print("box_num",len(boxs)) features = self._encoder(cv_image, boxs) # score to 1.0 here detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, self._max_overlap, scores) detections = [detections[i] for i in indices] # Call the _tracker self._tracker.predict() self._tracker.update(detections) detected_array = DetectedArray() detected_full = DetectedFull() detected_full.image = image_msg detected_array.size = 0 for track in self._tracker.tracks: if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() detected = Detected() detected.object_class = 'Person' detected.num = np.uint32(track.track_id) detected.p = 1.0 def range_check(x, min, max): if x < min: x = min if x > max: x = max return x detected.x = np.uint16( range_check(int(bbox[0]), 0, cv_image.shape[1])) detected.y = np.uint16( range_check(int(bbox[1]), 0, cv_image.shape[0])) detected.width = np.uint16( range_check( int(bbox[2]) - int(bbox[0]), 0, cv_image.shape[1])) detected.height = np.uint16( range_check( int(bbox[3]) - int(bbox[1]), 0, cv_image.shape[0])) cv2.rectangle(cv_image, (int(detected.x), int(detected.y)), (int(detected.x + detected.width), int(detected.y + detected.height)), (255, 0, 0), 2) cv2.putText(cv_image, 'Person:' + str(track.track_id), (int(detected.x), int(detected.y)), 0, 5e-3 * 100, (0, 255, 0), 2) detected_array.size = detected_array.size + 1 detected_array.data.append(detected) # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(cv_image,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,0), 2) # cv2.imshow('', cv_image) detected_full.header.stamp = image_msg.header.stamp detected_full.detections = detected_array if self._detected_pub.get_num_connections() > 0: self._detected_pub.publish(detected_full) ros_image = self._cv_bridge.cv2_to_imgmsg(cv_image) if self._image_pub.get_num_connections() > 0: self._image_pub.publish(ros_image) self._fps = (self._fps + (1. / (time.time() - now_time))) / 2 rospy.loginfo(self.name + " processing fps = %f" % (self._fps)) rospy.loginfo("Existing " + self.name + " object tracking...")
def yolo_frames(unique_name): device = unique_name[1] # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None nms_max_overlap = 3 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) yolo = YOLO() show_detections = True # show object box blue when detect writeVideo_flag = False # record video ouput defaultSkipFrames = 2 # skipped frames between detections # set up collection of door H1 = 210 W1 = 370 H2 = 235 W2 = 470 H = None W = None R = 80 # min R is 56 def solve_quadratic_equation(a, b, c): """ax2 + bx + c = 0""" delta = b**2 - 4 * a * c if delta < 0: print("Phương trình vô nghiệm!") elif delta == 0: return -b / (2 * a) else: print("Phương trình có 2 nghiệm phân biệt!") if float((-b - sqrt(delta)) / (2 * a)) > float( (-b + sqrt(delta)) / (2 * a)): return float((-b - sqrt(delta)) / (2 * a)) else: return float((-b + sqrt(delta)) / (2 * a)) def setup_door(H1, W1, H2, W2, R): # bước 1 tìm trung điểm của W1, H1 W2, H2 I1 = (W1 + W2) / 2 I2 = (H1 + H2) / 2 # tìm vecto AB u1 = W2 - W1 u2 = H2 - H1 # AB chính là vecto pháp tuyến của d # ta có phương trình trung tuyến của AB # y = -(u1 / u2)* x - c/u2 c = -u1 * I1 - u2 * I2 # tìm c # bước 2 tìm tâm O của đường tròn al = c / u2 + I2 # tính D: khoảng cách I và O fi = acos(sqrt((I1 - W1)**2 + (I2 - H1)**2) / R) D = sqrt((I1 - W1)**2 + (I2 - H1)**2) * tan(fi) O1 = solve_quadratic_equation((1 + u1**2 / u2**2), 2 * (-I1 + u1 / u2 * al), al**2 - D**2 + I1**2) O2 = -u1 / u2 * O1 - c / u2 # phương trình 2 nghiệm chỉ chọn nghiệm phía trên # Bước 3 tìm các điểm trên đường tròn door_dict = dict() for w in range(W1, W2): h = O2 + sqrt(R**2 - (w - O1)**2) door_dict[w] = round(h) return door_dict door_dict = setup_door(H1, W1, H2, W2, R) totalFrames = 0 totalIn = 0 # create a empty list of centroid to count traffic pts = [deque(maxlen=30) for _ in range(9999)] fps_imutils = fps_callback().start() get_feed_from = ('camera', device) while True: cam_id, frame = BaseCamera.get_frame(get_feed_from) # Resize frame # frame = cv2.resize(frame, (736, 480)) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # if the frame dimensions are empty, set them if W is None or H is None: (H, W) = frame.shape[:2] # Draw a door boundary for w in range(W1, W2): cv2.circle(frame, (w, door_dict[w]), 1, (0, 255, 255), -1) cv2.circle(frame, (W1, H1), 4, (0, 0, 255), -1) cv2.circle(frame, (W2, H2), 4, (0, 0, 255), -1) cv2.circle(frame, (204, 201), 4, (0, 0, 255), -1) if True: # totalFrames % defaultSkipFrames == 0: # t2 = time.time() boxes, confidence, classes = yolo.detect_image( image) # average time: 1.2s # print(time.time() - t2) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # else: # # Call the tracker # tracker.predict() # tracker.update(detections) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: det_cls = det.cls score = "%.2f" % (det.confidence * 100) + "%" cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3]) - 10), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 40: continue bbox = track.to_tlbr() # if not_count_staff(frame, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])): # # adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 255), 1) # cv2.putText(frame, "STAFF", (int(bbox[0]), int(bbox[1]) - 10), 0, # 1e-3 * frame.shape[0], (0, 0, 255), 1) # continue # adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) x = [c[0] for c in pts[track.track_id]] y = [c[1] for c in pts[track.track_id]] under_door = True centroid_x = int(((bbox[0]) + (bbox[2])) / 2) centroid_y = int(((bbox[1]) + (bbox[3])) / 2) # checker: Count person through store if not track.Counted and centroid_x in range(W1, W2): # if all centroid of user have detect in last 30 frame in door range(W1, W2) # if user is found in store so under_door is fail --> do not check through door if all(u[0] in range(W1, W2) for u in pts[track.track_id]): if all(u[1] < door_dict[u[0]] for u in pts[track.track_id]): under_door = False ''' check condition 1. person must go up: entroid_y < np.mean (y) 2. person must pass door circle: door_dict[centroid_x] > centroid_y 3. person must move around Horizontal at least 20 px: np.max (x) - np.min (x) > 20 4. person must have at least 1 centroid under the door ''' if centroid_y < np.mean(y) and door_dict[centroid_x] > centroid_y and np.max(x) - np.min(x) > 20 \ and under_door: totalIn += 1 track.Counted = True print(track.track_id, track.Counted) cv2.circle(frame, (centroid_x, centroid_y), 4, (0, 255, 0), -1) pts[track.track_id].append((centroid_x, centroid_y)) info = [("Time", "{:.4f}".format(fps_imutils.update_time())), ("In", totalIn)] # loop over the info tuples and draw them on our frame for (i, (k, v)) in enumerate(info): text = "{}: {}".format(k, v) cv2.putText(frame, text, (W - 150, ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) yield cam_id, frame
def main(yolo): # Determining the FPS of a video having variable frame rate # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos filename = "clip1.mp4" # Determining the total duration of the video clip = VideoFileClip(filename) cap2 = cv2.VideoCapture(filename) co = 0 ret2 = True while ret2: ret2, frame2 = cap2.read() # Determining the total number of frames co += 1 cap2.release() # Computing the average FPS of the video Input_FPS = co / clip.duration # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 frame_count = 0 # Implementing Deep Sort algorithm model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) # Cosine distance is used as the metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(filename) # Define the codec and create a VideoWriter object to save the output video out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS, (int(video_capture.get(3)), int(video_capture.get(4)))) # To calculate the frames processed by the deep sort algorithm per second fps = 0.0 # Initializing empty variables for counting and tracking purpose queue_track_dict = {} # Count time in queue alley_track_dict = {} # Count time in alley store_track_dict = {} # Count total time in store latest_frame = {} # Track the last frame in which a person was identified reidentified = {} # Yes or No : whether the person has been re-identified at a later point in time plot_head_count_store = [] # y-axis for Footfall Analysis plot_head_count_queue = [] # y-axis for Footfall Analysis plot_time = [] # x-axis for Footfall Analysis # Loop to process each frame and track people while True: ret, frame = video_capture.read() if ret != True: break maxIntensity = 255.0 phi = 1 theta = 1 newImage1 = (maxIntensity/phi)*(frame/(maxIntensity/theta))**1.3 frame = array(newImage1,dtype=uint8) cv2.imwrite('testing.jpg', frame) if frame_count == 5000: break head_count_store = 0 head_count_queue = 0 t1 = time.time() image = Image.fromarray(frame[...,::-1]) # BGR to RGB conversion boxs = yolo.detect_image(image) features = encoder(frame,boxs) # Getting the detections having score of 0.0 to 1.0 detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression on the bounding boxes boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker to associate tracking boxes to detection boxes tracker.predict() tracker.update(detections) # Defining the co-ordinates of the area of interest pts2 = np.array([[380, 250], [380, 360], [170, 480], [0, 480], [0, 380]], np.int32) pts2 = pts2.reshape((-1,1,2)) # Queue Area pts = np.array([[0, 380], [0, 0], [640, 0], [640, 480], [170, 480], [380, 360], [380, 250]], np.int32) pts = pts.reshape((-1,1,2)) # Alley Region cv2.polylines(frame, [pts2], True, (0,255,255), thickness=2) cv2.polylines(frame, [pts], True, (255,0,255), thickness=1) # Drawing tracker boxes and frame count for people inside the areas of interest for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # Checking if the person is within an area of interest queue_point_test = center_point_inside_polygon(bbox, pts2) alley_point_test = center_point_inside_polygon(bbox, pts) # Checking if a person has been reidentified in a later frame if queue_point_test == 'inside' or alley_point_test == 'inside': if track.track_id in latest_frame.keys(): if latest_frame[track.track_id] != frame_count - 1: reidentified[track.track_id] = 1 # Initializing variables incase a new person has been seen by the model if queue_point_test == 'inside' or alley_point_test == 'inside': head_count_store += 1 if track.track_id not in store_track_dict.keys(): store_track_dict[track.track_id] = 0 queue_track_dict[track.track_id] = 0 alley_track_dict[track.track_id] = 0 reidentified[track.track_id] = 0 # Processing for people inside the Queue Area if queue_point_test == 'inside': head_count_queue += 1 queue_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) wait_time = round((queue_track_dict[track.track_id] / Input_FPS), 2) cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4) cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2) # Processing for people inside the Alley Region if alley_point_test == 'inside': alley_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2) # Getting the total Store time for a person if track.track_id in store_track_dict.keys(): store_track_dict[track.track_id] = queue_track_dict[track.track_id] + alley_track_dict[track.track_id] # Drawing bounding box detections for people inside the store for det in detections: bbox = det.to_tlbr() # Checking if the person is within an area of interest queue_point_test = center_point_inside_polygon(bbox, pts) alley_point_test = center_point_inside_polygon(bbox, pts2) # if queue_point_test == 'inside' or alley_point_test == 'inside': # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2) # Video Overlay - Head Count Data at that instant cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in queue total_people = len([v for v in queue_track_dict.values() if v > 0]) total_queue_frames = sum(v for v in queue_track_dict.values() if v > 0) avg_queue_frames = 0 if total_people != 0: avg_queue_frames = total_queue_frames / total_people avg_queue_time = round((avg_queue_frames / Input_FPS), 2) # Video Overlay - Average Wait Time in Queue cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in the store total_people = len(store_track_dict) total_store_frames = sum(store_track_dict.values()) avg_store_frames = 0 if total_people != 0: avg_store_frames = total_store_frames / total_people avg_store_time = round((avg_store_frames / Input_FPS), 2) # Video Overlay - Average Store time cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Write the frame onto the VideoWriter object out.write(frame) # Calculating the frames processed per second by the model fps = ( fps + (1./(time.time()-t1)) ) / 2 frame_count += 1 # Printing processing status to track completion op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str(round(fps, 2)) print("\r" + op , end = "") # Adding plot values for Footfall Analysis every 2 seconds (hard coded for now) if frame_count % 50 == 0: plot_time.append(round((frame_count / Input_FPS), 2)) plot_head_count_store.append(head_count_store) plot_head_count_queue.append(head_count_queue) # Press Q to stop the video if cv2.waitKey(1) & 0xFF == ord('q'): break # Data Processed as per the video provided print("\n-----------------------------------------------------------------------") print("QUEUE WAIT TIME ( Unique Person ID -> Time spent )\n") for k, v in queue_track_dict.items(): print(k, "->", str(round((v/Input_FPS), 2)) + " seconds") print("\n-----------------------------------------------------------------------") print("ALLEY TIME ( Unique Person ID -> Time spent )\n") for k, v in alley_track_dict.items(): print(k, "->", str(round((v/Input_FPS), 2)) + " seconds") print("\n-----------------------------------------------------------------------") print("STORE TIME ( Unique Person ID -> Time spent )\n") for k, v in store_track_dict.items(): print(k, "->", str(round((v/Input_FPS), 2)) + " seconds") # Defining data to be written into the csv file - Detailed Report csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time', 'Re-Identified'] csv_data = [] csv_row = {} detailed_csv_file = 'Detailed_Store_Report.csv' for k, v in store_track_dict.items(): csv_row = {} if reidentified[k] == 1: reid = 'Yes' else: reid = 'No' csv_row = {csv_columns[0]: k, csv_columns[1]: round((queue_track_dict[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2), csv_columns[3]: reid} csv_data.append(csv_row) # Writing the data into the csv file - Detailed Report with open(detailed_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in csv_data: writer.writerow(data) # Defining data to be written into the csv file - Brief Report csv_columns_brief = ['Total Head Count', 'Total Queue Time', 'Average Queue Time', 'Total Store Time', 'Average Store Time'] brief_csv_file = 'Brief_Store_Report.csv' csv_data_brief = {csv_columns_brief[0]: len(store_track_dict), csv_columns_brief[1]: round((sum(queue_track_dict.values()) / Input_FPS), 2), csv_columns_brief[2]: avg_queue_time, csv_columns_brief[3]: round((sum(store_track_dict.values()) / Input_FPS), 2), csv_columns_brief[4]: avg_store_time} # Writing the data into the csv file - Brief Report with open(brief_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns_brief) writer.writeheader() writer.writerow(csv_data_brief) # Plotting a time-series line graph for store and queue head count data and saving it as a .png file plt.plot(plot_time, plot_head_count_queue) plt.plot(plot_time, plot_head_count_store) plt.legend(['Queue Head Count', 'Store Head Count'], loc='upper left') plt.xlabel('Time Stamp (in seconds)') plt.ylabel('Head Count') plt.xlim(0, round(frame_count / Input_FPS) + 1) plt.ylim(0, max(plot_head_count_store) + 2) plt.title('Footfall Analysis') plt.savefig('Footfall_Analysis.png', bbox_inches='tight') # Printing plot data for i in range(len(plot_time)): print(plot_time[i], plot_head_count_queue[i], plot_head_count_store[i]) # Releasing objects created video_capture.release() out.release() cv2.destroyAllWindows()
def main(queue,camID,initial_id,r_id,cam_id,unique_id): ''' Detect, Track and save infomation of persons Objectives: 1. Use YOLO to detect person and store coordinates 2. Use DeepSORT to track detected persons throughout video frames 3. Save detected persons for re-identification 4. If re-identified, replace camID with global camID across camera views ''' # Init YOLO model and load to memory yolo = YOLO() start = time.time() counter = [] #deep_sort metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) w = int(650) h = int(576) writeVideo_flag = True # Set to False to not save videos and detections if writeVideo_flag: # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('./output/'+str(camID)+'_output.avi', fourcc, 10, (w, h)) list_file = open('logs/detection_camera'+str(camID)+'.txt', 'w') frame_index = -1 fps = 0.0 frame_counter = 0 #Diretory Creation if not os.path.isdir('./images/frames/'+str(camID)): os.mkdir('./images/frames/'+str(camID)) if not os.path.isdir('./images/detections/'+str(camID)): os.mkdir('./images/detections/'+str(camID)) # Empty folders for file in (sorted(os.listdir('./images/detections/'+str(camID)))): os.remove('./images/detections/'+str(camID)+'/'+file) for file in (sorted(os.listdir('./images/frames/'+str(camID)))): os.remove('./images/frames/'+str(camID)+'/'+file) while not queue.empty(): # Retrieve a frame from the queue frame = queue.get() cv2.imwrite('./images/frames/'+str(camID)+'/'+str(frame_counter)+'.jpg',frame) frame_counter+=1 t1 = time.time() # frame_copy --> to be cropped according to detected person and saved # frame_save --> Frame to be saved with video only showing unique camID frame_copy = frame.copy() frame_save = frame.copy() image = Image.fromarray(frame[...,::-1]) #bgr to rgb # Perform YOLO detection (Objective 1) boxs,class_names = yolo.detect_image(image) backend.clear_session() features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker and update with current detections tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] boxes = [] for det in detections: bbox = det.to_tlbr() #cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue #boxes.append([track[0], track[1], track[2], track[3]]) # Store tracking_id as seperate variable for replacement tracking_id = track.track_id indexIDs.append(int(tracking_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() if int(bbox[0])<0 or int(bbox[1])<0 or int(bbox[2])<0 or int(bbox[3])<0: continue # Change camID's of re-identifed targets # Current using Camera camID 1 as the base of reference to compare other cameras if not camID == 1: for a in range(len(initial_id)): if int(track.track_id) == initial_id[a]: tracking_id = int(r_id[a]) # r_id is for only CAM 1 #Prevent donated camID from CAM 1 conflict with CAM 2 issued camID (which is coincidentally 1 as well) elif int(track.track_id) == r_id[a]: #Prevent identital camID on 1 source tracking_id = int(initial_id[a]) else: tracking_id = track.track_id else: tracking_id = track.track_id #Deepsort id color = [int(c) for c in COLORS[tracking_id]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) #bbox[0] and [1] is startpoint [2] [3] is endpoint # Select which camID to be displayed (Local or Global if re-identified) display_id = tracking_id for b in range(len(unique_id)): if tracking_id == r_id[b]: display_id = unique_id[b] cv2.putText(frame,str(display_id),(int(bbox[0]), int(bbox[1] -10)),0, 5e-3 * 150, (color),2) if len(class_names) > 0: class_name = class_names[0] #cv2.putText(frame, str(class_names[0]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (color),2) # Save bounding box data for re-identification (Objective 3) frame1 = frame_copy[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]#create instance of cropped frame using current frame, crop according to bounding box coordinates query_path = image_path+'/query' gallery_path = image_path+'/gallery' #Perform resizing to ensure equal size of features extracted from images frame2 = cv2.resize(frame1,(46,133),interpolation = cv2.INTER_AREA) #resize cropped image cv2.imwrite('./images/detections/'+str(camID)+'/frame'+str(frame_counter)+'_'+str(tracking_id)+'.jpg',frame2) if not camID == 1: dst_path = gallery_path #if file does not exist --> save file_path = dst_path+'/'+str(tracking_id)+'_'+str(camID)+'.png' if frame_counter % 10 == 0 or not os.path.isfile(file_path): cv2.imwrite(file_path,frame2)#save cropped frame if camID == 1: dst_path = query_path #if file does not exist --> save file_path = dst_path+'/'+str(tracking_id)+'.png' if frame_counter % 10 == 0 or not os.path.isfile(file_path): cv2.imwrite(file_path,frame2)#save cropped frame # Draw bounding boxes and Unique IDs for video to be saved if tracking_id in initial_id or tracking_id in r_id: if tracking_id in initial_id and not camID == 1: index = initial_id.index(tracking_id) color = [int(c) for c in COLORS[int(unique_id[index].split('P')[1])]] #Determine color of bounding box # Draw box and label with unique ID cv2.rectangle(frame_save, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) #bbox[0] and [1] is startpoint [2] [3] is endpoint cv2.putText(frame_save,str(unique_id[index]),(int(bbox[0]), int(bbox[1] -10)),0, 5e-3 * 150, (color),2) elif tracking_id in r_id and camID == 1: index = r_id.index(tracking_id) color = [int(c) for c in COLORS[int(unique_id[index].split('P')[1])]] cv2.rectangle(frame_save, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) #bbox[0] and [1] is startpoint [2] [3] is endpoint cv2.putText(frame_save,str(unique_id[index]),(int(bbox[0]), int(bbox[1] -10)),0, 5e-3 * 150, (color),2) i += 1 #bbox_center_point(x,y) center = (int(((bbox[0])+(bbox[2]))/2),int(((bbox[1])+(bbox[3]))/2)) #track_id[center] pts[track.track_id].append(center) #center point #cv2.circle(frame, (center), 1, color, 5) ''' #draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness) #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2) ''' count = len(set(counter)) # Visualize result cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3) cv2.namedWindow('Camera '+str(camID), 0) cv2.resizeWindow('Camera '+str(camID), w ,h) cv2.imshow('Camera '+str(camID), frame) if writeVideo_flag: #save a frame frame_save = cv2.resize(frame_save,(w,h)) # Resize frame to fit video out.write(frame_save) frame_index = frame_index + 1 # Write detections onto file list_file.write('./images/frames/'+str(camID)+'/'+str(frame_counter)+'.jpg'+' | ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ' + str(class_names[i][0])+', ') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break print(" ") print("[Finish]") end = time.time() if len(pts[track.track_id]) != None: print(source_names[camID-1]+": "+ str(count) + " " + str(class_name) +' Found') else: print("[No Found]") if writeVideo_flag: out.release() list_file.close() print('Time taken: '+str(round(end-start))+' seconds') cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) counter = 1 f = open("trackinfo.txt", 'w') while True: _, frame = vid.read() print(counter) counter = counter + 1 try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) #t1 = time.time() #pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function json.dump(tracked_bboxes, f) f.write("\n") # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) #print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows() f.close()
class Fence(): def __init__(self): self.yolo = YOLO() self.max_cosine_distance = 0.3 self.nn_budget = None self.nms_max_overlap = 1.0 self.encoder = gdet.create_box_encoder('model_data/mars-small128.pb', batch_size=1) self.tracker = Tracker( nn_matching.NearestNeighborDistanceMetric("cosine", self.max_cosine_distance, self.nn_budget)) self.dqs = [deque() for _ in range(9999)] self.poly = [] self.id_cnt_dict = {} self.queue_dict = {} def initArea(self, image): ''' 初始化敏感区域 输入图像点选保存坐标 :param image: hape w*h*3 :return: ''' def on_EVENT_LBUTTONDOWN(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: xy = "%d,%d" % (x, y) print(xy) cv2.circle(image, (x, y), 1, (255, 0, 0), thickness=-1) cv2.putText(image, xy, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness=1) self.poly.append([float(x), float(y)]) cv2.imshow("image", image) cv2.namedWindow("image") cv2.setMouseCallback("image", on_EVENT_LBUTTONDOWN) cv2.imshow("image", image) while (len(self.poly) < 4): try: cv2.waitKey(100) except Exception: cv2.destroyWindow("image") break cv2.destroyWindow("image") return self.poly def initPoly(self, poly): ''' 初始化敏感区域 :param poly: 输入区域坐标顶点[[float(x), float(y)]...] :return: ''' self.poly = poly def detect(self, image): ''' 只用检测模型获取bbox,输出不包含目标id :param image: shape w*h*3 :return: bboxes[[min x, min y, max x, max y]...] ''' img = Image.fromarray(image[..., ::-1]) # bgr to rgb boxs, ret_clss = self.yolo.detect_image(img) features = self.encoder( image, boxs) # The image of each frame is coded to match the box # score to 1.0 here). Each detection box and feature is encapsulated as an object detections = [ Detection(bbox, 1.0, feature, ret_cls) for bbox, feature, ret_cls in zip(boxs, features, ret_clss) ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] bboxes = [] for det in detections: bbox = det.to_tlbr() bboxes.append(bbox) return bboxes, ret_clss, detections def trackByDetect(self, image): ''' 检测跟踪获取bbox :param image: shape w*h*3 :return: b [[min x, min y, max x, max y]...] t [id1,id2,id3...] ''' bbox, ret_clss, detections = self.detect(image) # Call the tracker self.tracker.predict() self.tracker.update(detections) b = [] c = [] t = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue b.append(track.to_tlbr()) c.append(track.track_cls) t.append(track.track_id) return b, c, t def entanceAlert(self, bboxes): ''' 判断bbox中心是否在敏感区域中 :param bboxes: [[min x, min y, max x, max y]...] :return: isIn :[True,False,True....] ''' assert len(self.poly) == 4, '未初始化四边形铭感区域' isIn = [ self.winding_number(int(((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) == 'in' for bbox in bboxes ] return isIn def winding_number(self, point): ''' 区域坐标与中心坐标关系判断 :param point: [x,y] :return: "on" 边界线上 "in" 区域内 "out" 区域外 ''' self.poly.append(self.poly[0]) px = point[0] py = point[1] sum_of_p = 0 length = len(self.poly) - 1 # length = len(poly) for index in range(0, length): sx = self.poly[index][0] sy = self.poly[index][1] tx = self.poly[index + 1][0] ty = self.poly[index + 1][1] # The points coincide with the vertices of a polygon or are on the edges of a polygon if ((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)): return "on" # The Angle between a point and an adjacent vertex angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px) # Make sure the Angle is within the range(-π ~ π) if angle >= math.pi: angle = angle - math.pi * 2 elif angle <= -math.pi: angle = angle + math.pi * 2 sum_of_p += angle # Calculate the number of turns and judge the geometric relationship between points and polygons result = 'out' if int(sum_of_p / math.pi) == 0 else 'in' return result def get_poly(self): pts = np.array(self.poly, np.int32) # 顶点个数:4,矩阵变成4*1*2维,第一个参数为-1, 表明长度是根据后面的维度计算的。 pts = pts.reshape((-1, 1, 2)) return pts def person_in_area(self, q, flg=False): ''' 判断进入区域的动作 :param q: 人物的位置坐标的队列 :param flg: 开始的状态,默认在区域内 :return: ''' while True: if not q: return "non" box1 = q.popleft() if self.winding_number(box1) == "out": flg = True continue elif self.winding_number(box1) == "in" and flg: return "yes"
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 time_stamp = [] time_stamp_vid2 = [] time_ev_vid1 = datetime.strptime('00:00:00:00', "%H:%M:%S:%f") time_ev_vid2 = datetime.strptime('00:00:00:00', "%H:%M:%S:%f") flag = 0 time_dict = collections.defaultdict(list) time_ev1 = 0 time_ev2 = 0 time_diff = 0 reader = easyocr.Reader(['en']) print("easy ocr vocab loaded") # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video video_path1 = FLAGS.video1 # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) vid1 = cv2.VideoCapture(int(video_path1)) except: vid = cv2.VideoCapture(video_path) vid1 = cv2.VideoCapture(video_path1) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 cost = 0 _, frame_prev = vid1.read() frame_prev = cv2.cvtColor(frame_prev, cv2.COLOR_BGR2GRAY) # while video is running while True: return_value, frame = vid.read() return_value1, frame1 = vid1.read() result1 = frame1 if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break if (return_value1 and flag == 0): frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) cost = MSE(frame_prev, frame1) print("cost between two frames = ", cost) if (cost > 2000): text_in_frame2 = reader.readtext(frame1) time_in_frame2 = text_in_frame2[0][1] print("time in frame 1 ", time_in_frame2) time_frame2 = datetime.strptime(time_in_frame2, "%H:%M:%S:%f") time_stamp_vid2.append(time_frame2) time_ev_vid1 = time_frame2 time_ev1 = time_in_frame2 #time_dict['Event 1'].extend(str(time_in_frame2)) flag = 1 frame_prev = frame1 frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track center_coordinates = (int( (bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) print(center_coordinates) if (class_name == 'person' and center_coordinates[0] in range(120, 476) and center_coordinates[1] in range(191, 979)): # adding the ocr text_in_frame = reader.readtext(frame_gray) # if(len(text_in_frame) > 0): print("text in frame", text_in_frame) time_in_frame1 = text_in_frame[0][1] time_ev2 = time_in_frame1 print("time in frame", time_in_frame1) time_frame1 = datetime.strptime(time_in_frame1, "%H:%M:%S:%f") #time_stamp.append(time_frame1) if (flag == 1): time_ev_vid2 = time_frame1 #time_dict['Event two'].extend(str(time_in_frame1)) time_stamp.append(time_frame1) print("the time difference is ", (time_ev_vid2 - time_ev_vid1).seconds) flag = 2 break #print(vid.get(cv2.CAP_PROP_POS_MSEC)/1000) #time_stamp.append(vid.get(cv2.CAP_PROP_POS_MSEC)/1000) if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) #print(time_stamp) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) #result1 = cv2.cvtColor(frame1, cv2.COLOR_GRAY2BGR) # with open("video.txt", "w") as f: # f.write(str(time_stamp)) # with open("video2.txt", "w") as k: # k.write(str(time_stamp_vid2)) result1 = cv2.resize(result1, (400, 400)) result = cv2.resize(result, (400, 400)) if not FLAGS.dont_show: cv2.imshow("Output Video", np.concatenate((result1, result), axis=1)) if (flag == 2): print("the time difference is ", (time_ev_vid2 - time_ev_vid1).seconds) time_diff = (time_ev_vid2 - time_ev_vid1).seconds #time_dict['Time difference in seconds'].extend(str((time_ev_vid2 - time_ev_vid1).seconds)) break if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break time_diction = { "Event 1 Occured at ": time_ev1, "Event 2 occured at ": time_ev2, "time difference in seconds ": time_diff } # if output flag is set, save video file with open('output.json', "w") as out: json.dump(time_diction, out) cv2.destroyAllWindows()
def main( root, new_root, config_file ): """ Parameters ---------- root : str The root for the whole file hierachy. The root will be parsed by glob in "parse_hierachy". new_root : str In which the new hierachy will be constructed. config_file : str Configurations in ./config. """ cfg.merge_from_file(config_file) cfg.freeze() file_nodes = parse_hierachy(root, '**/*.mp4') # META definitions encoder_factory = EncoderFactory() detector_factory = DetectorFactory() ratio_thres_min = cfg.RATIO_THRESHOLD_MIN ratio_thres_max = cfg.RATIO_THRESHOLD_MAX height_thres = cfg.HEIGHT_THRESHOLD width_thres = cfg.WIDTH_THRESHOLD output_dir = new_root nms_max_overlap = cfg.NMS_MAX_OVERLAP distance_metric = cfg.NND.METRIC # cosine max_cosine_distance = cfg.NND.MAX_COS_DISTANCE # 0.3 nn_buget = cfg.NND.BUGET # None # writeVideo_flag = cfg.OUTPUT_DIR is not False max_iou_distance = cfg.TRACK.MAX_IOU_DISTANCE # 0.7 # deep_sort encoder = encoder_factory.make_encoder(cfg, cfg.ENCODER.NAME) detector = detector_factory.make_detector(cfg, cfg.DETECTOR.NAME) metric = nn_matching.NearestNeighborDistanceMetric(distance_metric, max_cosine_distance, nn_buget) for file_node in tqdm(file_nodes, desc='Files', leave=False): video_capture = cv2.VideoCapture(file_node.path) vfps = float(video_capture.get(CV_CAP_PROP_FPS)) num_frame = video_capture.get(CV_CAP_PROP_FRAME_COUNT) # w = int(video_capture.get(CV_CAP_PROP_FRAME_WIDTH)) # h = int(video_capture.get(CV_CAP_PROP_FRAME_HEIGHT)) max_age = int(cfg.TRACK.MAX_AGE_TIME * vfps) # 30 n_init = int(cfg.TRACK.N_INIT_TIME * vfps) # 3 # A kalman filter tracker # Predict the next position based on kalman filter. # Update the detections to calibrate the predictions. tracker = Tracker(metric, max_iou_distance, max_age, n_init) # A dictionary to collect tracks # Dict[track_id -> List[(frame_count, image)]] track_collection = {} frame_count = 0 eof_flag = False while not eof_flag: # Get a batch of images frames = [] batch_count = 0 while batch_count < cfg.BATCH: skip_count = 0 while skip_count < cfg.SKIP: ret, frame = video_capture.read() # frame shape 640*480*3 skip_count +=1 ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: eof_flag = True break else: batch_count += 1 frames.append(frame) if eof_flag: break # the last batch cannot satisfy the shape constrain t1 = time.time() # Batch process images = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames] images = np.array(images) img_bboxes, img_scores = detector(images) img_features = encoder(images, img_bboxes) # Batch update for i in range(cfg.BATCH): bboxes, scores, features, image = img_bboxes[i], img_scores[i], img_features[i], images[i] detections = [Detection(bbox, score, feature) for bbox, score, feature in zip(bboxes, scores, features)] detections = preprocessing.standardize_detections( detections, ratio_thres_min, ratio_thres_max, height_thres, width_thres) # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) active_tracks = [track for track in tracker.tracks if track.time_since_update==0] crops = crop_tracks(image, active_tracks) crops = {track_id: (frame_count, img) for track_id, img in crops.items()} for track_id, img in crops.items(): track_collection.setdefault(track_id, []).append(img) frame_count += 1 # fps = (cfg.BATCH / (time.time() - t1)) # print("Processing fps={:3f}. Video time {:.3f} passed.".format( # fps, frame_count / vfps)) # Close files video_capture.release() # Write to output file. Each track a directory. dir = file_node.echo(output_dir, make_dir=True) for track_id, packs in track_collection.items(): track_dir = os.path.join(dir, str(track_id)) os.mkdir(track_dir) for pack in packs: f_count, img = pack if img.size != 0: cv2.imwrite(os.path.join(track_dir, file_node.name + '_' + str(f_count) + '_' + str(track_id) + '.png'), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break; t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update >1 : continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.25 nn_budget = None nms_max_overlap = 0.1 # deep_sort model_filename = 'model_data/veri.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture('test_scene5.mp4') # feed testing video #video_capture = cv2.VideoCapture('round.mp4') video_fps = video_capture.get(cv2.CAP_PROP_FPS) #print ("Frames per second".format(video_fps)) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') # https://docs.opencv.org/2.4/modules/highgui/doc/reading_and_writing_images_and_video.html#cv2.VideoWriter out = cv2.VideoWriter('test_scene5_tar.mp4', fourcc, video_fps, (w, h)) #out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('tracking_DJI_0006.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) #boxs = darknet.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) bbox_center = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr( ) # generate_detections.py, input is a BGR color image cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) # red color cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # RGB, Green color bbox_center = (str(track.track_id), [ int(bbox[0] + bbox[2]) / 2, int(bbox[1] + bbox[3]) / 2 ]) print(bbox_center) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 for track in tracker.tracks: bbox = track.to_tlbr() bbox_center_o = ([ int(bbox[0] + bbox[2]) / 2, int(bbox[1] + bbox[3]) / 2 ]) list_file.write( str(track.track_id) + ',' + str(bbox_center_o) + '; ') list_file.write('\n') """ # don't need detections for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # RGB, (0, 0, 255), blue color cv2.imshow('', frame) """ fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()