def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs = yolo.detect_image(image) print("box_num", len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) print('indices', type(indices), indices) detections = [detections[i] for i in indices] print('detections', detections, type(detections)) # print(detections[0]) # Call the tracker tracker.predict() tracker.update(detections) for track, det in zip(tracker.tracks, detections): if not track.is_confirmed( ) or track.time_since_update > 1: #return true if track is confirmed and returns the no of frames since last update. continue trk_bbox = track.to_tlbr() det_bbox = det.to_tlbr() croppedImage = imcrop(frame, trk_bbox) cv2.imwrite("trk" + str(fps) + '.jpg', croppedImage) cv2.rectangle(frame, (int(trk_bbox[0]), int(trk_bbox[1])), (int(trk_bbox[2]), int(trk_bbox[3])), (255, 255, 255), 2) cv2.rectangle(frame, (int(det_bbox[0]), int(det_bbox[1])), (int(det_bbox[2]), int(det_bbox[3])), (255, 0, 0), 2) cv2.putText(frame, str(track.track_id), (int(trk_bbox[0]), int(trk_bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # for det in detections: # bbox = det.to_tlbr() # croppedImage = imcrop(frame, bbox) # cv2.imwrite("det"+str(fps)+'.jpg', croppedImage) # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # for track in tracker.tracks: # if not track.is_confirmed() or track.time_since_update > 1: # continue # bbox = track.to_tlbr() # croppedImage = imcrop(frame, bbox) # cv2.imwrite("trk"+str(fps) + '.jpg', croppedImage) # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # # for det in detections: # bbox = det.to_tlbr() # croppedImage = imcrop(frame, bbox) # cv2.imwrite("det"+str(fps)+'.jpg', croppedImage) # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] current_count = int(0) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
def main(): ## khoang cach cosine max_cosine_distance = 0.9 nn_budget = None model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric_left = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker_left = Tracker(metric_left) video_capture = cv2.VideoCapture(sys.argv[1]) frame_count = -1 tic = time.time() fps = 0.0 sum = 0 detections = None ## load YOLO model global metaMain, netMain, altNames darknet_image, metaMain, netMain, altNames = load_model( metaMain, netMain, altNames, version) if write_flag: folder_name = sys.argv[1] + "" folder_name = folder_name[:len(folder_name) - 4] folder_name = folder_name + "_tracked" if not os.path.exists(folder_name): os.mkdir(folder_name) folder_name = folder_name + "/" print(folder_name) frame_width = int(video_capture.get(3)) frame_height = int(video_capture.get(4)) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. out = cv2.VideoWriter(folder_name + 'output_tracked.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height)) f = open(folder_name + "person_count_tracked.txt", "w+") f2 = open(folder_name + "fps_tracked.txt", "w+") else: f = None frame_counter = -1 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: if write_flag: fps = frame_counter / sum # calculate an exponentially decaying average of fps number f2.write(str(fps) + "\n") break visual_frame = frame.copy() t1 = time.time() frame_count = frame_count + 1 frame_counter = frame_counter + 1 tic = time.time() frame = cv2.resize( frame, (darknet.network_width(netMain), darknet.network_height(netMain)), interpolation=cv2.INTER_LINEAR) if frame_count % step_frame == 0: frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (darknet.network_width(netMain), darknet.network_height(netMain)), interpolation=cv2.INTER_LINEAR) darknet.copy_image_from_bytes(darknet_image, frame_resized.tobytes()) detections = darknet.detect_image(netMain, metaMain, darknet_image, thresh=thresh) boxes = [] for detection_left in detections: if "person" in str(detection_left[0]): x, y, w, h = detection_left[2][0],\ detection_left[2][1],\ detection_left[2][2],\ detection_left[2][3] xmin, ymin, xmax, ymax = convertBack( int(x), int(y), int(w), int(h)) boxes.append((xmin, ymin, xmax - xmin, ymax - ymin)) detections = get_detection(frame, boxes, encoder) frame_count = 0 # Call the tracker tracker_left.predict() tracker_left.update(detections) toc = time.time() sum = sum + toc - tic # if write_flag and ((video3 and frame_counter % 30 == 0) or (video3 is False and frame_counter % 25 == 0)): if write_flag: visual_one_tracker(tracker_left, visual_frame, (255, 0, 0), f, frame_counter, darknet.network_width(netMain), darknet.network_height(netMain), step_frame) else: visual_one_tracker(tracker_left, visual_frame, (255, 0, 0), None, frame_counter, darknet.network_width(netMain), darknet.network_height(netMain), step_frame) fps_text = 'FPS: {:.2f}'.format(fps) cv2.putText(visual_frame, fps_text, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3) cv2.imshow("demo", visual_frame) if write_flag: out.write(visual_frame) key = cv2.waitKey(1) if key == ord('q'): break elif key == 32: key = cv2.waitKey(0) if key == ord('s'): cv2.imwrite("frame_read.jpg", visual_frame) video_capture.release() cv2.destroyAllWindows()
def YOLO(videopath): global metaMain, netMain, altNames configPath = "./configs/yolov4-helmet-detection.cfg" weightPath = "./configs/yolov4-helmet-detection.weights" metaPath = "./configs/yolov4-helmet-detection.data" if not os.path.exists(configPath): raise ValueError("Invalid config path `" + os.path.abspath(configPath)+"`") if not os.path.exists(weightPath): raise ValueError("Invalid weight path `" + os.path.abspath(weightPath)+"`") if not os.path.exists(metaPath): raise ValueError("Invalid data file path `" + os.path.abspath(metaPath)+"`") if netMain is None: netMain = darknet.load_net_custom(configPath.encode( "ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1 if metaMain is None: metaMain = darknet.load_meta(metaPath.encode("ascii")) if altNames is None: try: with open(metaPath) as metaFH: metaContents = metaFH.read() import re match = re.search("names *= *(.*)$", metaContents, re.IGNORECASE | re.MULTILINE) if match: result = match.group(1) else: result = None try: if os.path.exists(result): with open(result) as namesFH: namesList = namesFH.read().strip().split("\n") altNames = [x.strip() for x in namesList] except TypeError: pass except Exception: pass if not os.path.exists("outputs"): os.mkdir("outputs") """ DeepSORT Parameters """ max_cosine_distance = 0.5 nn_budget = None # load DeepSORT model sort_model_file = "model_data/mars-small128.pb" encoder = gdet.create_box_encoder(sort_model_file, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # load video file / streams cap = cv2.VideoCapture(videopath) original_fps = cap.get(cv2.CAP_PROP_FPS) original_dimension = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(original_fps, original_dimension) # create head detection result saving directory filename = videopath.split(".")[0].split("/")[1] directory = os.path.join(os.getcwd(), "outputs", filename) if not os.path.exists(directory): os.mkdir(directory) # create video output directory out_directory = os.path.join(os.getcwd(), "outputs", "video") if not os.path.exists(out_directory): os.mkdir(out_directory) # create VideoWriter for output video out_write = cv2.VideoWriter( os.path.join(out_directory, filename+"_processed.mp4") , cv2.VideoWriter_fourcc(*'MP4V') , original_fps , original_dimension ) print("Starting the YOLO loop...") # Create an image we reuse for each detect darknet_image = darknet.make_image(darknet.network_width(netMain), darknet.network_height(netMain),3) # network image size (416*416, ...) network_image_size = (darknet.network_width(netMain), darknet.network_height(netMain)) fps = 0.0 # head detection id array head_set = set() while True: ret, frame_read = cap.read() if ret: t1 = time.time() frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, network_image_size, interpolation=cv2.INTER_LINEAR) darknet.copy_image_from_bytes(darknet_image, frame_resized.tobytes()) # get inference information from Yolov4 Model (class, probability, (x,y,width,height)) detections = darknet.detect_image(netMain, metaMain, darknet_image, thresh=0.25) # deep sort inference bboxes = np.array([x[2] for x in detections]) scores = np.array([x[1] for x in detections]) classes = np.array([x[0].decode() for x in detections]) features = encoder(frame_resized, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, classes, features)] boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) tracker.predict() tracker.update(detections) # map color to draw random color for each sorting cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # Deep SORT results for track in tracker.tracks: class_name = track.get_class() if not track.is_confirmed() or track.time_since_update > 1: continue # if class is 'helmet', ignore if not FLAGS['HELMET_DRAW_ENABLED'] and class_name == "helmet": continue # deep sort results bbox = track.to_tlbr() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] # resize bounding box to fit in original image xmin, ymin, xmax, ymax = resizeCoord(frame_read.shape, network_image_size, (bbox[0], bbox[1], bbox[2], bbox[3])) xmin = (xmin * 2 - xmax) ymin = (ymin * 2 - ymax) # draw class, id on image with opacity mask_frame = frame_rgb.copy() ALPHA = 0.4 cv2.rectangle(mask_frame, (xmin, ymin-10), (xmin+(len(class_name)+len(str(track.track_id)))*9, ymin), color, -1) text_color = (255,255,255) if class_name == "helmet" else (0,0,0) cv2.putText(mask_frame, f"{class_name} - {track.track_id}", (xmin, ymin-4), cv2.FONT_HERSHEY_SIMPLEX, 0.25, text_color) frame_rgb = cv2.addWeighted(mask_frame, ALPHA, frame_rgb, 1 - ALPHA, 0) # draw bounding box cv2.rectangle(frame_rgb, (xmin, ymin), (xmax, ymax), color, 2) # if new head is appear on image, save image if FLAGS['SAVE_ON_NEW_HEAD'] and class_name == 'head' and track.track_id not in head_set: head_set.add(track.track_id) print("new head detected") savePath = os.path.join(os.getcwd(), "outputs", filename, f"{track.track_id}_{datetime.now().strftime('%Y_%m_%d %H_%M_%S')}.jpg") print(savePath) cv2.imwrite(savePath, cv2.hconcat([frame_read, cv2.cvtColor(frame_rgb, cv2.COLOR_BGR2RGB)])) # draw fps if FLAGS['SHOW_FPS']: fps = (fps + (1./(time.time() - t1))) / 2 cv2.putText(frame_rgb, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 0, 0), 2) # show in windows if FLAGS['SHOW_ORIGINAL_IMAGE']: cv2.imshow('Original', frame_read) result_frame = cv2.cvtColor(frame_rgb, cv2.COLOR_BGR2RGB) # show result video cv2.imshow('Video', result_frame) # save result video out_write.write(result_frame) # press 'q' to quit if cv2.waitKey(1) == ord('q'): break else: break cap.release() out_write.release() cv2.destroyAllWindows()
def main(_argv): # set present path home = os.getcwd() # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort # model_filename = 'weights/mars-small128.pb' model_filename = os.path.join(home, "weights", "arcface_weights.h5") encoder = gdet.create_box_encoder(model_filename, batch_size=128) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') times = [] # Database 생성 face_db = dict() db_path = FLAGS.database for name in os.listdir(db_path): name_path = os.path.join(db_path, name) name_db = [] for i in os.listdir(name_path): if i.split(".")[1] != "jpg": continue id_path = os.path.join(name_path, i) img = cv2.imread(id_path) # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img_in = tf.expand_dims(img_in, 0) # img_in = transform_images(img_in, FLAGS.size) # boxes, scores, classes, nums = yolo.predict(img_in) boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]]) scores = np.asarray([[1]]) converted_boxes = convert_boxes(img, boxes, scores) features = encoder(img, converted_boxes) if features.shape[0] == 0: continue for f in range(features.shape[0]): name_db.append(features[f, :]) name_db = np.asarray(name_db) face_db[name] = dict({"used": False, "db": name_db}) try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 detection_list = [] while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) # print(boxes, scores, classes, nums) # time.sleep(5) t2 = time.time() times.append(t2 - t1) print(f'yolo predict time : {t2-t1}') times = times[-20:] t3 = time.time() ############# classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0], scores[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] t4 = time.time() print(f'feature generation time : {t4-t3}') #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] t5 = time.time() # Call the tracker tracker.predict() # tracker.update(detections) tracker.update(detections, face_db, FLAGS.max_face_threshold) t6 = time.time() print(f'tracking time : {t6-t5}') frame_index = frame_index + 1 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() face_name = track.get_face_name() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id)) + len(str(face_name))) * 23, int(bbox[1])), color, -1) # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) cv2.putText( img, class_name + "-" + str(track.track_id) + "-" + face_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) # print(class_name + "-" + str(track.track_id)) # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))})) if face_name != "": detection_list.append( dict({ "frame_no": str(frame_index), "id": str(face_name), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2]) - int(bbox[0])), "height": str(int(bbox[3]) - int(bbox[1])) })) ####### fps = (fps + (1. / (time.time() - t1))) / 2 # img = draw_outputs(img, (boxes, scores, classes, nums), class_names) # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30), # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2) if FLAGS.output: out.write(img) # frame_index = frame_index + 1 # list_file.write(str(frame_index)+' ') # if len(converted_boxes) != 0: # for i in range(0,len(converted_boxes)): # list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ') # list_file.write('\n') cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows() frame_list = sorted(detection_list, key=lambda x: (int(x["frame_no"]), int(x["id"]))) # pprint.pprint(frame_list) f = open(FLAGS.eval, "w") for a in frame_list: f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n") # 파일 닫기 f.close()
def Object_tracking(YoloV3, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times = [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) t1 = time.time() pred_bbox = YoloV3.predict(image_data) t2 = time.time() times.append(t2 - t1) times = times[-20:] pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_image, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function ms = sum(times) / len(times) * 1000 fps = 1000 / ms # draw detection on frame image = draw_bbox(original_image, tracked_bboxes, CLASSES=CLASSES, tracking=True) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False file_path = 'video.webm' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs, confidence = yolo.detect_image(image) features = encoder(frame, boxs) detections = [ Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): start = time.time() #Definition of the parameters max_cosine_distance = 0.5 #余弦距离的控制阈值 nn_budget = None nms_max_overlap = 0.3 #非极大抑制的阈值 counter = [] #deep_sort model_filename = './re_id/mars-small128.pb' encoder = create_box_encoder(model_filename, batch_size=32) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True #video_path = "./output/output.avi" video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter( './output/' + args["input"][43:57] + "_" + args["class"] + '_output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb t_detect_image = time.time() boxs, class_names = yolo.detect_image(image) t_detect_image_ = time.time() print("t_detect_image" + str(t_detect_image_ - t_detect_image)) t_detect_image = time.time() features = encoder(frame, boxs) t_detect_image_ = time.time() print("t_encoder" + str(t_detect_image_ - t_detect_image)) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue #boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) i += 1 #bbox_center_point(x,y) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) #track_id[center] pts[track.track_id].append(center) thickness = 5 #center point cv2.circle(frame, (center), 1, color, thickness) #draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2) count = len(set(counter)) cv2.putText(frame, "Total Object Counter: " + str(count), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(i), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) cv2.namedWindow("YOLO3_Deep_SORT", 0) cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768) cv2.imshow('YOLO3_Deep_SORT', frame) if writeVideo_flag: #save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 #print(set(counter)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break print(" ") print("[Finish]") end = time.time() if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters with open("cfg/detection_tracker_cfg.json") as detection_config: detect_config = json.load(detection_config) with open("cfg/doors_info.json") as doors_config: doors_config = json.load(doors_config) with open("cfg/around_doors_info.json") as around_doors_config: around_doors_config = json.load(around_doors_config) model_filename = detect_config["tracking_model"] input_folder, output_folder = detect_config["input_folder"], detect_config[ "output_folder"] meta_folder = detect_config["meta_folder"] output_format = detect_config["output_format"] # Deep SORT max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) show_detections = True asyncVideo_flag = False check_gpu() # from here should start loop to process videos from folder # for video_name in os.listdir(input_folder): HOST = "localhost" PORT = 8075 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.bind((HOST, PORT)) sock.listen() conn, addr = sock.accept() with conn: print('Connected by', addr) # loop over all videos while True: data = conn.recv(1000) video_motion_list = data.decode("utf-8").split(';') videos_que = deque() for video_motion in video_motion_list: videos_que.append(video_motion) video_name = videos_que.popleft() if not video_name.endswith(output_format): continue print('elements in que', len(videos_que)) print("opening video: {}".format(video_name)) full_video_path = join(input_folder, video_name) # full_video_path = "rtsp://*****:*****@192.168.1.52:554/1/h264major" meta_name = meta_folder + video_name[:-4] + ".json" with open(meta_name) as meta_config_json: meta_config = json.load(meta_config_json) camera_id = meta_config["camera_id"] if not os.path.exists(output_folder + str(camera_id)): os.mkdir(output_folder + str(camera_id)) output_name = output_folder + camera_id + '/out_' + video_name counter = Counter(counter_in=0, counter_out=0, track_id=0) tracker = Tracker(metric) if asyncVideo_flag: video_capture = VideoCaptureAsync(full_video_path) video_capture.start() w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: video_capture = cv2.VideoCapture(full_video_path) w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_name, fourcc, 25, (w, h)) door_array = doors_config["{}".format(camera_id)] around_door_array = tuple( around_doors_config["{}".format(camera_id)]) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) border_door = door_array[3] # loop over video save_video_flag = False while True: fps_imutils = imutils.video.FPS().start() ret, frame = video_capture.read() if not ret: with open('videos_saved/log_results.txt', 'a') as log: log.write( 'processed (ret). Time: {}, camera id: {}\n'. format(video_name, camera_id)) break t1 = time.time() # lost_ids = counter.return_lost_ids() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # image = image.crop(around_door_array) boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 3) if len(detections) != 0: counter.someone_inframe() for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: score = "%.2f" % (det.confidence * 100) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3) else: if counter.need_to_clear(): counter.clear_all() # identities = [track.track_id for track in tracker.tracks] # counter.update_identities(identities) for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: continue bbox = track.to_tlbr() if track.track_id not in counter.people_init or counter.people_init[ track.track_id] == 0: # counter.obj_initialized(track.track_id) ratio_init = find_ratio_ofbboxes( bbox=bbox, rect_compare=rect_door) if ratio_init > 0: if ratio_init >= 0.5: # and bbox[3] < door_array[3]: counter.people_init[ track.track_id] = 2 # man in the door elif ratio_init < 0.5: # and bbox[3] > door_array[3]: # initialized in the outside counter.people_init[track.track_id] = 1 else: counter.people_init[track.track_id] = 1 counter.people_bbox[track.track_id] = bbox counter.cur_bbox[track.track_id] = bbox adc = "%.2f" % (track.adc * 100 ) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1]) + 50), 0, 1e-3 * frame.shape[0], (0, 255, 0), 3) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 3) cv2.putText(frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 3) # if track.time_since_update >= 15: # id_get_lost.append(track.track_id) id_get_lost = [ track.track_id for track in tracker.tracks if track.time_since_update >= 15 ] for val in counter.people_init.keys(): ratio = 0 cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) if val in id_get_lost and counter.people_init[ val] != -1: ratio = find_ratio_ofbboxes( bbox=counter.cur_bbox[val], rect_compare=rect_door) if counter.people_init[val] == 2 \ and ratio < 0.6: # and counter.people_bbox[val][3] > border_door \ counter.get_out() save_video_flag = True print(counter.people_init[val], ratio) elif counter.people_init[val] == 1 \ and ratio >= 0.6: counter.get_in() save_video_flag = True print(counter.people_init[val], ratio) counter.people_init[val] = -1 ins, outs = counter.return_counter() cv2.rectangle(frame, (frame.shape[1] - 150, 0), (frame.shape[1], 50), (0, 0, 0), -1, 8) cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (frame.shape[1] - 140, 20), 0, 1e-3 * frame.shape[0], (255, 255, 255), 3) out.write(frame) fps_imutils.update() if not asyncVideo_flag: pass # fps = (1. / (time.time() - t1)) # print("FPS = %f" % fps) # if len(fpeses) < 15: # fpeses.append(round(fps, 2)) # # elif len(fpeses) == 15: # # fps = round(np.median(np.array(fpeses))) # median_fps = float(np.median(np.array(fpeses))) # fps = round(median_fps, 1) # print('max fps: ', fps) # # fps = 20 # counter.fps = fps # fpeses.append(fps) if cv2.waitKey(1) & 0xFF == ord('q'): break if asyncVideo_flag: video_capture.stop() del video_capture else: video_capture.release() if save_video_flag: with open('videos_saved/log_results.txt', 'a') as log: log.write( 'detected!!! time: {}, camera id: {}, detected move in: {}, out: {}\n' .format(video_name, camera_id, ins, outs)) log.write('video written {}\n\n'.format(output_name)) out.release() else: if out.isOpened(): out.release() if os.path.isfile(output_name): os.remove(output_name) if os.path.isfile(full_video_path): os.remove(full_video_path) if os.path.isfile(meta_name): os.remove(meta_name) save_video_flag = False cv2.destroyAllWindows()
def deepsort(yolo, args): #nms_max_overlap = 0.3 #nms threshold images_input = True if os.path.isdir(args.input) else False if images_input: # get images list jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg')) jpg_files = glob.glob(os.path.join(args.input, '*.jpg')) frame_capture = jpeg_files + jpg_files frame_capture.sort() else: # create video capture stream frame_capture = cv2.VideoCapture(0 if args.input == '0' else args.input) if not frame_capture.isOpened(): raise IOError("Couldn't open webcam or video") # create video save stream if needed save_output = True if args.output != "" else False if save_output: if images_input: raise ValueError("image folder input could be saved to video file") # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later # to convert it to x264 to reduce file size: # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4 # #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc( *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v") video_fps = frame_capture.get(cv2.CAP_PROP_FPS) video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(args.output, video_FourCC, (5. if args.input == '0' else video_fps), video_size) if args.tracking_classes_path: # load the object classes used in tracking if have, other class # from detector will be ignored tracking_class_names = get_classes(args.tracking_classes_path) else: tracking_class_names = None #create deep_sort box encoder encoder = create_box_encoder(args.deepsort_model_path, batch_size=1) #create deep_sort tracker max_cosine_distance = 0.5 #threshold for cosine distance nn_budget = None metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # alloc a set of queues to record motion trace # for each track id motion_traces = [deque(maxlen=30) for _ in range(9999)] total_obj_counter = [] # initialize a list of colors to represent each possible class label np.random.seed(100) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") i = 0 fps = 0.0 while True: ret, frame = get_frame(frame_capture, i, images_input) if ret != True: break #time.sleep(0.2) i += 1 start_time = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # detect object from image _, out_boxes, out_classnames, out_scores = yolo.detect_image(image) # get tracking objects and convert bbox from (xmin,ymin,xmax,ymax) to (x,y,w,h) boxes, class_names, scores = get_tracking_object( out_boxes, out_classnames, out_scores, tracking_class_names) # get encoded features of bbox area image features = encoder(frame, boxes) # form up detection records detections = [ Detection(bbox, score, feature, class_name) for bbox, score, class_name, feature in zip( boxes, scores, class_names, features) ] # Run non-maximum suppression. #nms_boxes = np.array([d.tlwh for d in detections]) #nms_scores = np.array([d.confidence for d in detections]) #indices = preprocessing.non_max_suppression(nms_boxes, nms_max_overlap, nms_scores) #detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # show all detection result as white box for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(det.class_name), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) track_indexes = [] track_count = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # record tracking info and get bbox track_indexes.append(int(track.track_id)) total_obj_counter.append(int(track.track_id)) bbox = track.to_tlbr() # show all tracking result as color box color = [ int(c) for c in COLORS[track_indexes[track_count] % len(COLORS)] ] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) if track.class_name: cv2.putText(frame, str(track.class_name), (int(bbox[0] + 30), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) track_count += 1 # get center point (x,y) of current track bbox and record in queue center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) motion_traces[track.track_id].append(center) # draw current center point thickness = 5 cv2.circle(frame, (center), 1, color, thickness) #draw motion trace motion_trace = motion_traces[track.track_id] for j in range(1, len(motion_trace)): if motion_trace[j - 1] is None or motion_trace[j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]), (color), thickness) # show tracking statistics total_obj_num = len(set(total_obj_counter)) cv2.putText(frame, "Total Object Counter: " + str(total_obj_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(track_count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # refresh window cv2.namedWindow("DeepSORT", 0) cv2.resizeWindow('DeepSORT', 1024, 768) cv2.imshow('DeepSORT', frame) if save_output: #save a frame out.write(frame) end_time = time.time() fps = (fps + (1. / (end_time - start_time))) / 2 # Press q to stop video if cv2.waitKey(1) & 0xFF == ord('q'): break # Release everything if job is finished if not images_input: frame_capture.release() if save_output: out.release() cv2.destroyAllWindows()
def main(): ########################################################################################################## #preparation part args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 #assert后面语句为false时触发,中断程序 assert inp_dim > 32 if CUDA: model.cuda() model.eval() global confirm global person fps = 0.0 count = 0 frame = 0 person = [] confirm = False reconfirm = False count_yolo = 0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) #record the video fourcc = cv2.VideoWriter_fourcc(*'XVID') #out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True) cap = cv2.VideoCapture(0) detect_time = [] recogn_time = [] kalman_time = [] aux_time = [] while True: start = time.time() ret, color_image = cap.read() ''' frames = pipeline.wait_for_frames() color_frame = frames.get_color_frame() color_image = np.asanyarray(color_frame.get_data()) ''' if color_image is None: break img, orig_im, dim = prep_image(color_image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) ########################################################################################################## #people detection part if CUDA: im_dim = im_dim.cuda() img = img.cuda() time_a = time.time() if count_yolo %3 == 0: #detect people every 3 frames output = model(Variable(img), CUDA) #适配后的图像放进yolo网络中,得到检测的结果 output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim #夹紧张量,限制在一个区间内 #im_dim = im_dim.repeat(output.size(0), 1) output[:,[1,3]] *= color_image.shape[1] output[:,[2,4]] *= color_image.shape[0] output = output.cpu().numpy() output = sellect_person(output) #把标签不是人的output去掉,减少计算量 output = np.array(output) output_update = output elif count_yolo %3 != 0: output = output_update count_yolo += 1 list(map(lambda x: write(x, orig_im), output)) #把结果加到原来的图像中 #output的[0,1:4]分别为框的左上和右下的点的位置 detect_time.append(time.time() - time_a) ########################################################################################################## time_a = time.time() #kalman filter part outputs_tlwh = to_tlwh(output) ##把output数据变成适合kalman更新的类型 features = encoder(orig_im,outputs_tlwh) detections = [Detection(output_tlwh, 1.0, feature) for output_tlwh, feature in zip(outputs_tlwh, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlbr() cv2.rectangle(orig_im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),(255,255,255), 2) cv2.putText(orig_im, str(track.track_id),(int(box[0]), int(box[1])),0, 5e-3 * 200, (0,255,0),2) kalman_time.append(time.time() - time_a) ########################################################################################################## #face recognition part time_a = time.time() if confirm == False: saved_model = './ArcFace/model/068.pth' name_list = os.listdir('./users') path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list] total_features = np.empty((128,),np.float32) for i in path_list: temp = np.loadtxt(i) total_features = np.vstack((total_features,temp)) total_features = total_features[1:] #threshold = 0.30896 #阈值并不合适,可能是因为训练集和测试集的差异所致!!! threshold = 0.5 model_facenet = mobileFaceNet() model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list']) model_facenet.eval() #use_cuda = torch.cuda.is_available() and True #device = torch.device("cuda" if use_cuda else "cpu") device = torch.device("cuda") # is_cuda_avilable trans = transforms.Compose([ transforms.Resize((112,112)), transforms.ToTensor(), transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) ]) model_facenet.to(device) img = Image.fromarray(color_image) bboxes, landmark = detect_faces(img) #首先检测脸 if len(bboxes) == 0: print('detect no people') else: for bbox in bboxes: loc_x_y = [bbox[2], bbox[1]] person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy() #从图像中截取框 feature = np.squeeze(get_feature(person_img, model_facenet, trans, device)) #框里的图像计算feature cos_distance = cosin_metric(total_features, feature) index = np.argmax(cos_distance) if cos_distance[index] <= threshold: continue person = name_list[index] #在这里加框加文字 orig_im = draw_ch_zn(orig_im,person,font,loc_x_y) #加名字 cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) #加box #cv2.imshow("frame", orig_im) ########################################################################################################## #confirmpart print('confirmation rate: {} %'.format(count*10)) cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) if len(bboxes)!=0 and len(output)!=0: if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person: count+=1 frame+=1 if count>=10 and frame<=30: confirm = True print('confirm the face is belong to that people') elif frame >= 30: print('fail confirm, and start again') reconfirm = True count = 0 frame = 0 if reconfirm == True: cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) ########################################################################################################## recogn_time.append(time.time() - time_a) time_a = time.time() #show the final output result if not confirm: cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2) #把识别的名字加上去 if confirm: for track in tracker.tracks: bbox = track.to_tlbr() if track.track_id == 1: cv2.putText(orig_im, person, (int(bbox[0])+100,int(bbox[1])+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) #rate.sleep() cv2.imshow("frame", orig_im) #out.write(orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break aux_time.append(time.time()-time_a) fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) #calculate how long each part takes avg_detect_time = np.mean(detect_time) avg_recogn_time = np.mean(recogn_time) avg_kalman_time = np.mean(kalman_time) avg_aux_time = np.mean(aux_time) print("avg detect: {}".format(avg_detect_time)) print("avg recogn: {}".format(avg_recogn_time)) print("avg kalman: {}".format(avg_kalman_time)) print("avg aux: {}".format(avg_aux_time)) print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
def main(): signal.signal(signal.SIGINT, signal_handler) print('Running. Press Ctrl + C to exit.') print (DEFAULT_LABEL_MAP_PATH) # parse arguments parser = argparse.ArgumentParser(description='--- Raspbery Pi Urban Mobility Tracker ---') parser.add_argument('-modelpath', dest='model_path', type=str, required=False, help='specify path of a custom detection model') parser.add_argument('-labelmap', dest='label_map_path', default=DEFAULT_LABEL_MAP_PATH, type=str, required=False, help='specify the label map text file') parser.add_argument('-imageseq', dest='image_path', type=str, required=False, help='specify an image sequence') parser.add_argument('-video', dest='video_path', type=str, required=False, help='specify video file') parser.add_argument('-camera', dest='camera', default=True, action='store_true', help='specify this when using the rpi camera as the input') parser.add_argument('-threshold', dest='threshold', type=float, default=0.5, required=False, help='specify a custom inference threshold') parser.add_argument('-tpu', dest='tpu', required=False, default=True, action='store_true', help='add this when using a coral usb accelerator') parser.add_argument('-nframes', dest='nframes', type=int, required=False, default=10, help='specify nunber of frames to process') parser.add_argument('-display', dest='live_view', required=False, default=True, action='store_true', help='add this flag to view a live display. note, that this will greatly slow down the fps rate.') parser.add_argument('-save', dest='save_frames', required=False, default=False, action='store_true', help='add this flag if you want to persist the image output. note, that this will greatly slow down the fps rate.') args = parser.parse_args() # basic checks if args.model_path: assert args.label_map_path, "when specifying a custom model, you must also specify a label map path using: '-labelmap <path to labelmap.txt>'" if args.model_path: assert os.path.exists(args.model_path)==True, "can't find the specified model..." if args.label_map_path: assert os.path.exists(args.label_map_path)==True, "can't find the specified label map..." if args.video_path: assert os.path.exists(args.video_path)==True, "can't find the specified video file..." print('> INITIALIZING UMT...') print(' > THRESHOLD:',args.threshold) # parse label map labels = parse_label_map(args, DEFAULT_LABEL_MAP_PATH) # initialize detector interpreter = initialize_detector(args) # create output directory if not os.path.exists('output') and args.save_frames: os.makedirs('output') # initialize deep sort tracker metric = nn_matching.NearestNeighborDistanceMetric("cosine", MAX_COSINE_DIST, NN_BUDGET) tracker = Tracker(metric) # initialize image source img_generator = initialize_img_source(args) # initialize plot colors (if necessary) if args.live_view or args.save_frames: COLORS = (np.random.rand(32, 3) * 255).astype(int) # main tracking loop print('\n> TRACKING...') #with open(TRACKER_OUTPUT_TEXT_FILE, 'w') as out_file: for i, pil_img in enumerate(img_generator(args)): f_time = int(time.time()) print('> FRAME:', i) # add header to trajectory file ''' if i == 0: header = (f'frame_num, rpi_time, obj_class, obj_id, obj_age,' 'obj_t_since_last_update, obj_hits,' 'xmin, ymin, xmax, ymax') tracked_list.append(header) ''' # get detections detections = generate_detections(pil_img, interpreter, args.threshold) # proceed to updating state if len(detections) == 0: print('> no detections...') else: # update tracker tracker.predict() tracker.update(detections) # save object locations if len(tracker.tracks) > 0: for track in tracker.tracks: bbox = track.to_tlbr() class_name = labels[track.get_class()] row = (f'{i},{f_time},{class_name},' f'{track.track_id},{int(track.age)},' f'{int(track.time_since_update)},{str(track.hits)},' f'{int(bbox[0])},{int(bbox[1])},' f'{int(bbox[2])},{int(bbox[3])}') tracked_list.append(row) # only for live display if args.live_view or args.save_frames: # convert pil image to cv2 cv2_img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) # cycle through actively tracked objects for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # draw detections and label bbox = track.to_tlbr() class_name = labels[track.get_class()] color = COLORS[int(track.track_id) % len(COLORS)].tolist() cv2.rectangle(cv2_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(cv2_img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(str(class_name))+len(str(track.track_id)))*17, int(bbox[1])), color, -1) cv2.putText(cv2_img, str(class_name) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,255), 1) # live view if args.live_view: cv2.imshow("tracker output", cv2_img) cv2.waitKey(1) # persist frames if args.save_frames: cv2.imwrite(f'output/frame_{i}.jpg', cv2_img) cv2.destroyAllWindows() pass
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True #video_capture = cv2.VideoCapture(0) #video_capture = cv2.VideoCapture('videos/soccer_01.mp4') video_capture = cv2.VideoCapture('videos/M0902.avi') # video_capture = cv2.VideoCapture('videos/uav123_car6.avi') #video_capture = cv2.VideoCapture('videos/car/car_11.mp4') if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 i=0; while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break; t1 = time.time() # i+=1 # if i%2!=1: # continue; image = Image.fromarray(frame) boxs, out_classes = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] print("indices:",indices) print("detection: ",detections[indices[0]]); # Call the tracker tracker.predict() tracker.update(detections) fps = ( fps + (1./(time.time()-t1)) ) / 2 #print("fps= %f"%(fps)) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for j,det in enumerate(detections): bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.putText(frame,str(out_classes[j]),(int(bbox[0]),int(bbox[1])-35),0,5e-3*200,(143,17,86),2) cv2.namedWindow("track result", 0) cv2.resizeWindow("track result", 1280, 720) cv2.imshow('track result', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def detect_yolo(self, input): try: this_dir = os.path.dirname(__file__) yolo = YOLO() max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = os.path.join(this_dir, 'models/mars-small128.pb') encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(input) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(os.path.join(this_dir, 'data/output.avi'), fourcc, 15, (w, h)) list_file = open(os.path.join(this_dir, 'data/detection.txt'), 'w') frame_index = -1 fps = 0.0 n = 0 skip_frame = 5 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break if int(skip_frame) != n: n += 1 continue n = 0 t1 = time.time() image = Image.fromarray(frame) boxs, classes = yolo.detect_image(image) for idb, box in enumerate(boxs): # cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0]) + int(box[2]), int(box[1]) + int(box[3])), # (255, 255, 255), 2) cv2.putText(frame, str(classes[idb]), (int(box[0]), int(box[1])), 0, 5e-3 * 100, (0, 255, 0), 2) features = encoder(frame, boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for idb, det in enumerate(detections): bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.imshow('gallery', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str( boxs[i][3]) + ';') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows() msg = "process finished!!!" except Exception as e: print(e) msg = "process error!!!" return msg
class DeepSort(object): def __init__(self, max_age=30, nms_max_overlap=1.0, max_cosine_distance=0.2, nn_budget=None, override_track_class=None, clock=None, half=True): ''' Input Params: - nms_max_overlap: Non-maxima suppression threshold: Maximum detection overlap - max_cosine_distance: Gating threshold for cosine distance - nn_budget: Maximum size of the appearance descriptors, if None, no budget is enforced ''' print('Initialising DeepSort..') # self.video_info = video_info # assert clock is not None self.nms_max_overlap = nms_max_overlap metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric, max_age=max_age, override_track_class=override_track_class, clock=clock) self.embedder = Embedder(half=half) print('DeepSort Tracker initialised!') def update_tracks(self, frame, raw_detections): """Run multi-target tracker on a particular sequence. Parameters ---------- frame : ndarray Path to the MOTChallenge sequence directory. raw_detections : list List of triples ( [left,top,w,h] , confidence, detection_class) Returns ------- list of track objects (Look into track.py for more info or see "main" section below in this script to see simple example) """ results = [] raw_detections = [ d for d in raw_detections if d[0][2] > 0 and d[0][3] > 0 ] embeds = self.generate_embeds(frame, raw_detections) # Proper deep sort detection objects that consist of bbox, confidence and embedding. detections = self.create_detections(frame, raw_detections, embeds) # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. self.tracker.predict() self.tracker.update(detections) return self.tracker.tracks def generate_embeds(self, frame, raw_dets): crops = [] im_height, im_width = frame.shape[:2] for detection in raw_dets: if detection is None: continue l, t, w, h = [int(x) for x in detection[0]] r = l + w b = t + h crop_l = max(0, l) crop_r = min(im_width, r) crop_t = max(0, t) crop_b = min(im_height, b) crops.append(frame[crop_t:crop_b, crop_l:crop_r]) return self.embedder.predict(crops) def create_detections(self, frame, raw_dets, embeds): detection_list = [] for i in range(len(embeds)): detection_list.append( Detection(raw_dets[i][0], raw_dets[i][1], embeds[i])) return detection_list def refresh_track_ids(self): self.tracker._next_id
def main(_argv): region = load_ROI() # Definition of the parameters max_cosine_distance = 0.3 #Default = 0.5 nn_budget = None nms_max_overlap = 0.8 #Default = 0.5 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) video_name = os.path.splitext(FLAGS.video)[-2] weights = 'weights/yolov3_sang.tf' yolo.load_weights(weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') #WRITE RESULT result = "tracking_result/{}_track.txt".format(video_name) file_out = open(result,'w') path = os.getcwd() path = str(os.path.split(os.path.split(path)[0])[0]) #vid_path = os.path.join(path,"Data/{}/{}.mp4".format(video_name,video_name)) vid_path = os.path.join(path,"data/test_data/{}.mp4".format(video_name)) vid = cv2.VideoCapture(vid_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) frame_index = frame_index + 1 if frame_index % 100 == 0: print('FRAME: ',frame_index) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) x_cen = int((int(bbox[2]) + int(bbox[0]))/2) y_cen = int((int(bbox[3]) + int(bbox[1]))/2) if is_in_region((int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),region) == False: #NGOAI ROI THI XOA track.delete_track() cv2.putText(img,"FRAME: "+ str(frame_index),(0,45),cv2.FONT_HERSHEY_COMPLEX_SMALL,1,(0,255,0),2) #GHI FILE TRACKING_RESULT theo chuan CountMovement bb_width = int(bbox[2]) - int(bbox[0]) bb_height = int(bbox[3]) - int(bbox[1]) diagonal = math.sqrt(bb_height**2 + bb_width**2) file_out.write("{},{},{},{},{},{},{},{},{}\n".format(frame_index,track.track_id,x_cen,y_cen,diagonal,-1.0,class_to_classNumber(str(class_name)),bb_width,bb_height)) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN for det in detections: bbox = det.to_tlbr() cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,255,0), 1) # print fps on screen fps = ( fps + (1./(time.time()-t1)) ) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def detect_video_with_deepsort(yolo, video_path, rot_number, output_path="", deepsort_model_filename=None): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError("Couldn't open webcam or video") video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC)) video_fps = vid.get(cv2.CAP_PROP_FPS) video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) isOutput = True if output_path != "" else False if isOutput: print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size)) out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() # deep_sort 加载 encoder = gdet.create_box_encoder(deepsort_model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) while True: return_value, frame_bgr = vid.read() #目标检测使用frame_rbg格式,因为训练时是用rfg图片训练的,deepsort使用bgr格式图片,因为原始代码是这样 # 1、opencv是以brg方式打开的,所以要转换成rbg才能识别 frame_rbg = cv2.cvtColor(frame_bgr.copy(), cv2.COLOR_BGR2RGB) # 2、图片旋转 frame_rbg = np.rot90(frame_rbg, rot_number) # 3、yolo检测,输出的是tlbr frame_rbg_Image = Image.fromarray(frame_rbg) out_boxes_tlbr, out_scores, out_classes = yolo.get_detect_boxes( frame_rbg_Image) #4、将目标检测输出的tlbr框转成tlwh框 out_boxes_tlwh = [] out_boxes_tlbr_1 = copy.deepcopy(out_boxes_tlbr) #如果列表中有列表,只能使用深度复制列表 if len(out_boxes_tlbr_1) != 0: for bbox in out_boxes_tlbr_1: bbox[2:] -= bbox[:2] out_boxes_tlwh.append(bbox) # print('out_boxes:',out_boxes[i]) features = encoder(frame_bgr, out_boxes_tlwh) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(out_boxes_tlwh, features) ] # 5、Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #6 、deepsort跟踪 tracker.predict() tracker.update(detections) #7 、deepsort跟踪画框 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # 4、将tlwh转成tlbr bbox = track.to_tlbr() cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame_bgr, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) #8、目标检测画框 detections = out_boxes_tlbr for bbox in detections: cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv2.putText(frame_bgr, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(255, 0, 0), thickness=2) # cv2.namedWindow("result", cv2.WINDOW_NORMAL) # if isOutput: # out.write(result) cv2.imshow('', frame_bgr) if cv2.waitKey(1) & 0xFF == ord('q'): break yolo.close_session()
def main(yolo, url, CreateBoxEncoder, q): producer = None if KAFKA_ON: ip_port = '{}:{}'.format(KAFKA_IP, KAFKA_PORT) producer = KafkaProducer(bootstrap_servers=ip_port) logger.debug('open kafka') # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) door = get_door(url) # init var center_mass = {} miss_ids = [] disappear_box = {} person_list = [] in_house = {} in_out_door = {"out_door_per": 0, "into_door_per": 0} only_id = str(uuid.uuid4()) logger.debug('rtmp: {} load finish'.format(url)) last_person_num = 0 last_monitor_people = 0 while True: t1 = time.time() if q.empty(): continue frame = q.get() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs, scores_ = yolo.detect_image(image) t2 = time.time() # print('5====={}======{}'.format(os.getpid(), round(t2 - t1, 4))) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) logger.debug("box_num: {}".format(len(boxs))) features = CreateBoxEncoder.encoder(frame, boxs) # score to 1.0 here). # detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] detections = [ Detection(bbox, scores_, feature) for bbox, scores_, feature in zip(boxs, scores_, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # 实时人员ID保存 track_id_list = [] cv2.rectangle(frame, (door[0], door[1]), (door[2], door[3]), (0, 0, 255), 2) door_half_h = int(int((door[1] + door[3]) / 2) * DOOR_HIGH) cv2.line(frame, (0, door_half_h), (111111, door_half_h), (0, 255, 0), 1, 1) high_score_ids = {} for track in tracker.tracks: # 当跟踪的目标在未来的20帧未出现,则判断丢失,保存至消失的id中间区 if track.time_since_update == MAX_AGE: miss_id = str(track.track_id) miss_ids.append(miss_id) if not track.is_confirmed() or track.time_since_update > 1: continue # 如果人id存在,就把人id的矩形框坐标放进center_mass 否则 创建一个key(人id),value(矩形框坐标)放进center_mass track_id = str(track.track_id) bbox = track.to_tlbr() near_door = is_near_door({track_id: bbox}, door) if track.score >= 0.92 and not near_door: high_score_ids[track_id] = [[ int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) ]] track_id_list.append(track_id) if track_id in center_mass: center_ = center_mass.get(track_id) if len(center_) > 49: center_.pop(0) center_.append( [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) else: center_mass[track_id] = [[ int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) ]] # # -------------------------------------------- # # logger.debug('box1:{}'.format([int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])])) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) x0, y0 = int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2) cv2.putText(frame, str(round(track.score, 3)), (x0, y0), 0, 0.6, (0, 255, 0), 2) # cv2.circle(frame, (x0, y0), 2, (0, 255, 255), thickness=2, lineType=1, shift=0) # # -------------------------------------------- # x0, y0 = int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2) # w = abs(int(bbox[3]) - int(bbox[1])) # h = abs(int(bbox[2]) - int(bbox[0])) logger.info('id:{}, score:{}'.format(track_id, track.score)) for id in miss_ids: if id in center_mass.keys(): disappear_box[id] = center_mass[id] del center_mass[id] miss_ids.clear() # # 进出门判断 out_or_in(center_mass, door, in_house, disappear_box, in_out_door) # near_door = is_near_door(center_mass, door, disappear_id) # 相对精准识别人 用来实时传递当前人数 box_score_person = [scores for scores in scores_ if scores > 0.72] person_sum = in_out_door['into_door_per'] - in_out_door['out_door_per'] # if person_sum <= len(high_score_ids) and not near_door: if person_sum <= len(high_score_ids): # 当时精准人数大于进出门之差时 来纠正进门人数 并把出门人数置为0 if person_sum == len(high_score_ids) == 1: pass # print('person_sum == len(high_score_ids) == 1') else: logger.warning('reset in_out_door person') in_out_door['out_door_per'] = 0 in_out_door['into_door_per'] = len(high_score_ids) in_house.update(high_score_ids) # print('high score:{}'.format(high_score_ids)) logger.warning('22222222-id: {} after into of door: {}'.format( in_house.keys(), in_out_door['into_door_per'])) person_sum = len(high_score_ids) if in_out_door['into_door_per'] == in_out_door['out_door_per'] > 0: in_out_door['into_door_per'] = in_out_door['out_door_per'] = 0 if len(person_list) > 100: person_list.pop(0) person_list.append(person_sum) # 从url提取摄像头编号 pattern = str(url)[7:].split(r"/") logger.debug('pattern {}'.format(pattern[VIDEO_CONDE])) video_id = pattern[VIDEO_CONDE] logger.info('object tracking cost {}'.format(time.time() - t1)) # 当列表中都是0的时候 重置进出门人数和所有字典参数变量 if person_list.count(0) == len(person_list) == 101: logger.debug('long time person is 0') in_out_door['into_door_per'] = 0 in_out_door['out_door_per'] = 0 in_house.clear() logger.warning('All Clear') cv2.putText(frame, "person: " + str(person_sum), (40, 40), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "now_per: " + str(len(box_score_person)), (280, 40), 0, 5e-3 * 200, (0, 255, 0), 2) # 当满足条件时候 往前端模块发送人员的信息 if (last_person_num != person_sum or last_monitor_people != len(box_score_person)) and producer: monitor_people_num = len(box_score_person) logger.debug("person-sum:{} monitor-people_num:{}".format( person_sum, monitor_people_num)) # if int(time.time()) - last_time >= 1: cv2.imwrite( "/opt/code/deep_sort_yolov3/image/{}.jpg".format( str(uuid.uuid4())), frame) # print('save img success') save_to_kafka(TOPIC_SHOW, now, person_sum, url, producer, video_id, monitor_people_num, only_id) if last_person_num > 0 and person_sum == 0: only_id = str(uuid.uuid4()) if last_person_num == 0 and person_sum > 0: save_to_kafka(TOPIC_NVR, now, person_sum, url, producer, video_id, len(box_score_person), only_id) # last_time = int(time.time()) last_person_num = person_sum last_monitor_people = len(box_score_person) # 当满足条件时候 往NVR模块发送信息 logger.info('url:{} into_door_per: {}'.format( url, in_out_door['into_door_per'])) logger.info('url:{} out_door_per: {}'.format( url, in_out_door['out_door_per'])) logger.info('url:{} in_house: {}'.format(url, in_house)) logger.info('url:{} monitor_people_num: {}'.format( url, len(box_score_person))) logger.info('url:{} person_sum: {}'.format(url, person_sum)) logger.info('GPU image load cost {}'.format(time.time() - t1)) t3 = time.time() fps = round(1 / (round(t3 - t1, 4)), 3) # print('pid:{}===fps:{}===time:{}'.format(os.getpid(), fps, round(t3 - t1, 4))) # print('*' * 30) fps = ((1 / (time.time() - t1))) logger.debug("fps= %f" % (fps)) cv2.imshow('', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
class Pipeline: """Object detection and tracking pipeline""" def __init__(self, args, input=None): self.args = args # Initialise camera & camera viewport self.init_camera(input) # Initialise output self.init_output(self.args.output) # Initialise object detector (for some reason it has to happen # here & not within detect_objects(), or else the inference engine # gets upset and starts throwing NaNs at me. Thanks, Python.) self.object_detector = SSD_MOBILENET(wanted_label='person', model_file=self.args.model, label_file=self.args.labels, num_threads=self.args.num_threads) # Initialise feature encoder if self.args.encoder_model is None: model_filename = '{}/mars-64x32x3.pb'.format( self.args.deepsorthome) else: model_filename = self.args.encoder_model self.encoder = gdet.create_box_encoder( model_filename, batch_size=self.args.encoder_batch_size) # Initialise tracker nn_budget = None metric = nn_matching.NearestNeighborDistanceMetric( "cosine", self.args.max_cosine_distance, nn_budget) self.tracker = Tracker(metric, max_iou_distance=self.args.max_iou_distance, max_age=self.args.max_age) # Initialise database self.db = {} self.delcount = 0 self.intcount = 0 self.poscount = 0 self.negcount = 0 self.loop = asyncio.get_event_loop() def init_camera(self, input): if input is None: self.input = self.args.input else: self.input = input self.cap = cv2.VideoCapture(self.input) # Configure the 'counting line' in the camera viewport if self.args.line is None: w, h = self.args.camera_width, self.args.camera_height self.countline = np.array([[w / 2, 0], [w / 2, h]], dtype=int) else: self.countline = np.array(list( map(int, self.args.line.strip().split(','))), dtype=int).reshape(2, 2) self.cameracountline = self.countline.astype(float) def init_output(self, output): self.color_mode = None # fixme fourcc = cv2.VideoWriter_fourcc(*'MP4V') fps = self.cap.get(cv2.CAP_PROP_FPS) (w, h) = (self.args.camera_width, self.args.camera_height) self.backbuf = Image.new("RGBA", (w, h), (0, 0, 0, 0)) self.draw = ImageDraw.Draw(self.backbuf) self.output = cv2.VideoWriter(self.args.output, fourcc, fps, (w, h)) def read_frame(self): ret, frame = self.cap.read() return (frame, time.time()) async def capture(self, q): try: with concurrent.futures.ThreadPoolExecutor() as pool: while self.running: (frame, t_frame) = await self.loop.run_in_executor( pool, self.read_frame) #print(frame) if frame is None: print('Frame is None') break await q.put((frame, t_frame)) await asyncio.sleep(1.0 / 30.0) finally: self.cap.release() def run_object_detector(self, image): t1 = time.time() boxes = self.object_detector.detect_image(image) t2 = time.time() return (boxes, t2 - t1) async def detect_objects(self, q_in, q_out): # Initialise background subtractor backSub = cv2.createBackgroundSubtractorMOG2() frameCount = 0 with concurrent.futures.ThreadPoolExecutor() as pool: while self.running: frameCount += 1 # Obtain next video frame (frame, t_frame) = await q_in.get() if self.args.camera_flip: # If we need to flip the image vertically frame = cv2.flip(frame, 0) # Apply background subtraction to find image-mask of areas of motion fgMask = backSub.apply(frame) # Convert to PIL Image image = Image.fromarray( cv2.cvtColor(frame, cv2.COLOR_BGRA2RGBA)) # Run object detection engine within a Thread Pool (boxes0, delta_t) = await self.loop.run_in_executor( pool, self.run_object_detector, image) # Filter object detection boxes, including only those with areas of motion boxes = [] for (x, y, w, h) in boxes0: x, y, w, h = int(x), int(y), int(w), int(h) # Check if the box includes any detected motion if np.any(fgMask[x:x + w, y:y + h]): boxes.append((x, y, w, h)) # Send results to next step in pipeline elements = [ FrameInfo(t_frame, frameCount), CameraImage(image), CameraCountLine(self.cameracountline), TimingInfo('Object detection latency', 'objd', delta_t) ] await q_out.put((frame, boxes, elements)) async def encode_features(self, q_in, q_out): with concurrent.futures.ThreadPoolExecutor() as pool: while self.running: # Obtain next video frame and object detection boxes (frame, boxes, elements) = await q_in.get() # Run feature encoder within a Thread Pool features = await self.loop.run_in_executor( pool, self.encoder, frame, boxes) # Build list of 'Detection' objects and send them to next step in pipeline detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features) ] await q_out.put((detections, elements)) async def track_objects(self, q_in, q_out): while self.running: (detections, elements) = await q_in.get() boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, self.args.nms_max_overlap, scores) detections = [detections[i] for i in indices] self.tracker.predict() self.tracker.update(detections) await q_out.put((detections, elements)) async def process_results(self, q_in, q_out): while self.running: (detections, elements) = await (q_in.get()) for track in self.tracker.deleted_tracks: i = track.track_id if track.is_deleted(): self.check_deleted_track(track.track_id) for track in self.tracker.tracks: i = track.track_id if not track.is_confirmed() or track.time_since_update > 1: continue if i not in self.db: self.db[i] = [] bbox = track.to_tlbr() # Find the bottom-centre of the bounding box & add it to the tracking database bottomCentre = np.array([(bbox[0] + bbox[2]) / 2.0, bbox[3]]) self.db[i].append(bottomCentre) if len(self.db[i]) > 1: # If we have more than one datapoint for this tracked object pts = (np.array(self.db[i]).reshape( (-1, 1, 2))).reshape(-1) elements.append(TrackedPath(pts)) p1 = self.cameracountline[0] q1 = self.cameracountline[1] p2 = np.array(self.db[i][-1]) q2 = np.array(self.db[i][-2]) cp = np.cross(q1 - p1, q2 - p2) if intersection(p1, q1, p2, q2): self.intcount += 1 print( "track_id={} just intersected camera countline; cross-prod={}; intcount={}" .format(i, cp, self.intcount)) elements.append(TrackedPathIntersection(pts[-4:])) if cp >= 0: self.poscount += 1 else: self.negcount += 1 # send_mqtt_msg(frameCapTime) elements.append(TrackedObject(bbox, str(track.track_id))) for det in detections: bbox = det.to_tlbr() elements.append(DetectedObject(bbox)) elements.append(CountingStats(self.negcount, self.poscount)) await q_out.put(elements) def graphical_output(self, render: RenderInfo, elements, output_wh: (int, int)): (output_w, output_h) = output_wh # Clear screen self.draw.rectangle([0, 0, output_w, output_h], fill=0, outline=0) # Sort elements by display priority elements.sort(key=lambda e: e.priority) # Draw elements for e in elements: if hasattr(e, 'do_render'): e.do_render(render) # Copy backbuf to output backarray = np.array(self.backbuf) if self.color_mode is not None: outputrgba = cv2.cvtColor(backarray, self.color_mode) else: outputrgba = backarray outputrgb = cv2.cvtColor(outputrgba, cv2.COLOR_RGBA2RGB) self.output.write(outputrgb) #cv2.imshow('main', outputrgb) def text_output(self, handle, elements): # Sort elements by priority elements.sort(key=lambda e: e.priority) for e in elements: if hasattr(e, 'do_text'): e.do_text(handle, elements) async def render_output(self, q_in): (output_w, output_h) = (self.args.camera_width, self.args.camera_height) ratio = 1 #fixme render = RenderInfo(ratio, FontLib(output_w), self.draw, self.backbuf) try: while self.running: elements = await q_in.get() self.graphical_output(render, elements, (output_w, output_h)) for e in elements: if isinstance(e, FrameInfo): t_frame = e.t_frame break elements.append( TimingInfo('Overall latency', 'overall', time.time() - t_frame)) self.text_output(sys.stdout, elements) await asyncio.sleep(1.0 / 30.0) # FIXME finally: self.output.release() def check_deleted_track(self, i): if i in self.db and len(self.db[i]) > 1: if any_intersection(self.cameracountline[0], self.cameracountline[1], np.array(self.db[i])): self.delcount += 1 print("delcount={}".format(self.delcount)) self.db[i] = [] async def start(self): self.running = True cameraQueue = FreshQueue() objectQueue = asyncio.Queue(maxsize=1) detectionQueue = asyncio.Queue(maxsize=1) resultQueue = asyncio.Queue(maxsize=1) drawQueue = asyncio.Queue(maxsize=1) asyncio.ensure_future(self.render_output(drawQueue)) asyncio.ensure_future(self.process_results(resultQueue, drawQueue)) asyncio.ensure_future(self.track_objects(detectionQueue, resultQueue)) asyncio.ensure_future(self.encode_features(objectQueue, detectionQueue)) asyncio.ensure_future(self.detect_objects(cameraQueue, objectQueue)) await self.capture(cameraQueue) self.running = False
def main(argv): # print("location recieved in main as: ", e) ################################### global VIOLATION_PERCENTAGE, PROCESSING_STATUS, VIOLATION_FRAME violator_count_list = list() ################################### # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) yolo = YoloV3(classes=80) yolo.load_weights('./weights/yolov3.tf') logging.info('weights loaded') class_names = [c.strip() for c in open('./coco.names').readlines()] logging.info('classes loaded') video_path = 'test.mkv' try: vid = cv2.VideoCapture(int(FILE_URL)) except: vid = cv2.VideoCapture(FILE_URL) time.sleep(1.0) out = None width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("height: ", height) print("width: ", width) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('./result.avi', codec, fps, (width, height)) frame_index = -1 fps = 0.0 count = 0 PROCESSING_STATUS = True while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, 416) temp_violators = set() temp_total_people = set() t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue class_name1 = track.get_class() if class_name1 == "person": temp_total_people.add(track.track_id) bbox1 = track.to_tlbr() x1_c = int(bbox1[0] + (bbox1[2] - bbox1[0]) / 2) y1_c = int(bbox1[1] + (bbox1[3] - bbox1[1]) / 2) r1 = int(abs(bbox1[3] - bbox1[1])) color = (255, 0, 0) cv2.line(img, (x1_c, y1_c), (x1_c, y1_c + r1 // 2), (0, 255, 0), 2) cv2.circle(img, (x1_c, y1_c), 5, (255, 20, 200), -1) scale = (r1) / 100 transparentOverlay(img, dst_circle, (x1_c, y1_c - 5), alphaVal=110, color=(0, 200, 20), scale=scale) for other in tracker.tracks: if not other.is_confirmed() or other.time_since_update > 1: continue if track.track_id == other.track_id: continue class_name2 = other.get_class() if class_name2 == "person": temp_total_people.add(other.track_id) bbox2 = other.to_tlbr() x2_c = int(bbox2[0] + (bbox2[2] - bbox2[0]) / 2) y2_c = int(bbox2[1] + (bbox2[3] - bbox2[1]) / 2) r2 = int(abs(bbox2[3] - bbox2[1])) if int_circle(x1_c, y1_c, x2_c, y2_c, r1 // 2, r2 // 2) >= 0 and abs(y1_c - y2_c) < r1 // 4: temp_violators.add(track.track_id) temp_violators.add(other.track_id) cv2.line(img, (x1_c, y1_c), (x2_c, y2_c), (0, 0, 255), 2) scale1 = (r1) / 100 transparentOverlay(img, dst_circle, (x1_c, y1_c - 5), alphaVal=110, color=(0, 0, 255), scale=scale1) scale2 = (r2) / 100 transparentOverlay(img, dst_circle, (x2_c, y2_c - 5), alphaVal=110, color=(0, 0, 255), scale=scale2) # print fps on screen ### Comment below 3 lines to not see live output screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) ### Violators calculation violators_for_frame = len(temp_violators) VIOLATION_PERCENTAGE = violators_for_frame print("Violation percentage: ", violators_for_frame) violator_count_list.append(int(violators_for_frame)) ### ### Call to firebase upload function # if violators_for_frame > 20: # social_dist_violation_frame_handler(img) # cv2.imwrite("temp.png",img) # firebase_upload("temp.png") # os.remove("temp.png") frame_index = frame_index + 1 # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if len(violator_count_list) == 0: mean_violation = 0 else: mean_violation = sum(violator_count_list) / len(violator_count_list) PROCESSING_STATUS = False out.release() cv2.destroyAllWindows()
def run(self, catch): def intersect(A, B, C, D): return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw( A, B, D) def ccw(A, B, C): return (C[1] - A[1]) * (B[0] - A[0]) > (B[1] - A[1]) * (C[0] - A[0]) def vector_angle(midpoint, previous_midpoint): x = midpoint[0] - previous_midpoint[0] y = midpoint[1] - previous_midpoint[1] return math.degrees(math.atan2(y, x)) global truck global car titik1 = (100, 511) titik2 = (551, 511) # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # initialize counting variables count_dict = {} # initiate dict for storing counts total_counter = 0 up_count = 0 down_count = 0 from collections import Counter class_counter = Counter() # store counts of each detected class from collections import deque already_counted = deque( maxlen=50) # temporary memory for storing counted IDs intersect_info = [] # initialise intersection list memory = {} # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) input_size = 416 video_path = 'C:/Users/MSI Laptop/Pictures/overpass.mp4' #ini dia saved_model_loaded = tf.saved_model.load('./checkpoints/yolov4-416', tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(catch)) except: vid = cv2.VideoCapture(catch) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print( 'Video Telah Selesai atau Gagal Memuat, coba dengan Video lainnya!' ) break frame_num += 1 # print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.45, score_threshold=0.50) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) # allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression( boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) #buat garis biru cv2.line(frame, titik1, titik2, (0, 255, 255), 2) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr( ) # Get current position in bounding box format `(min x, miny, max x,max y) #track_cls = track.cls # most common detection class for track class_name = track.get_class() #Object counting midpoint = track.tlbr_midpoint( bbox) # Finds midpoint of a box in tlbr format. origin_midpoint = (midpoint[0], frame.shape[0] - midpoint[1] ) # get midpoint respective to botton-left if track.track_id not in memory: memory[track.track_id] = deque(maxlen=2) memory[track.track_id].append(midpoint) previous_midpoint = memory[track.track_id][0] origin_previous_midpoint = (previous_midpoint[0], frame.shape[0] - previous_midpoint[1]) if intersect(midpoint, previous_midpoint, titik1, titik2) and track.track_id not in already_counted: class_counter[class_name] += 1 total_counter += 1 cv2.line(frame, titik1, titik2, (255, 0, 0), 2) #garis merah already_counted.append( track.track_id) # Set already counted for ID to true. angle = vector_angle(origin_midpoint, origin_previous_midpoint) if angle > 0: up_count += 1 if angle < 0: down_count += 1 # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if len(memory) > 50: del memory[list(memory)[0]] fps = 1.0 / (time.time() - start_time) # Draw total count. text = ("FPS: %.2f" % fps) frame = ps.putBText(frame, text, text_offset_x=int(frame.shape[1] - 185), text_offset_y=int(0.05 * frame.shape[0]), vspace=10, hspace=10, font_scale=1.0, background_RGB=(228, 20, 222), text_RGB=(255, 255, 255)) text = "Total: {}".format(str(total_counter)) frame = ps.putBText(frame, text, text_offset_x=int(10), text_offset_y=int(0.05 * frame.shape[0]), vspace=10, hspace=10, font_scale=1.0, background_RGB=(10, 20, 222), text_RGB=(255, 255, 255)) # display counts for each class as they appear y = 0.12 * frame.shape[0] for cls in class_counter: class_count = class_counter[cls] text = str(cls) + " " + str(class_count) if str(cls) == 'car': car = str(class_count) elif str(cls) == 'truck': truck = str(class_count) frame = ps.putBText(frame, text, text_offset_x=int(10), text_offset_y=int(y), vspace=5, hspace=10, font_scale=1.0, background_RGB=(20, 210, 4), text_RGB=(255, 255, 255)) y += 0.05 * frame.shape[0] # self.ui.label_2.setText(text) # calculate frames per second of running detections # fps = 1.0 / (time.time() - start_time) # print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # cv2.imshow("Output Video", result) # self.ui.label.setPixmap(QPixmap.fromImage(result)) self.display_frame(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def detect(yolo, videoChoice, site, ip): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(videoPath) starttime = time.time() if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output%d.avi' % (site), fourcc, 30, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) print("box_num", len(boxs)) # 调用http协议,传输数据 import requests # 这里需要加上异常处理 try: url = ip + '/peoplecount/insert?num=%d&site=%d' % (len(boxs), site) print(url) req = requests.get(url) print(req.text) except requests.exceptions.RequestException as e: print('error') features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): avg = [] # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 #regression model load weight_path = './2_input_model_2-3.5%/' loaded_model = tf.keras.models.load_model(weight_path) # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] #print("pred_bbox: ",pred_bbox[0]) #print("scores: ",pred_bbox[1]) #print("classes :",pred_bbox[2]) #print("num :",pred_bbox[3]) #print("width :",width) #print("height :",height) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) #print("boxs ",boxs) #print("scores ",scores) #print("classes ",classes) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) #print("indices ",indices) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.putText(frame, "using regress", (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2) #cv2.putText(frame, "Objects being detected: {}".format(count), (5, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 0, 255), 2) cv2.putText(frame, "frame# {}".format(frame_num), (750, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if 'entrance' not in classes: if len(classes) > 1: if (contains_duplicates(classes) == False): color = (50, 89, 170) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) x1, y1, x2, y2 = convert2( width, height, int(boxs[0][0]), int(boxs[0][1]), int(boxs[0][0] + boxs[0][2]), int(boxs[0][1] + boxs[0][3])) #xywh to xmin ymin xmax ymax x3, y3, x4, y4 = convert2( width, height, int(bboxes[1][0]), int(bboxes[1][1]), int(bboxes[1][0] + bboxes[1][2]), int(bboxes[1][1] + bboxes[1][3])) #xywh to xmin ymin xmax ymax reg_input = np.array([[ class_index(classes[0]), x1, y1, x2, y2, class_index(classes[1]), x3, y3, x4, y4 ]]) predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin, xmax, ymin, ymax = convert( width, height, a1_pred, b1_pred, c1_pred, d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1 = xmax - xmin rect2 = ymax - ymin check_rect = rect2 / rect1 print("check_rect:{}".format(check_rect)) if check_rect > 1: blk = np.zeros(frame.shape, np.uint8) cv2.rectangle(blk, start_point, end_point, color, cv2.FILLED) frame = cv2.addWeighted(frame, 1.0, blk, 0.5, 1) print( "predict_BBox Coords (xmin, ymin, xmax, ymax): {}" .format((xmin, ymin, xmax, ymax))) ######## # select one entrace ######## #if classes.count('entrance')>1: # entrance_num=[] # iou_list=[] # iou_check=[] # for i in range(len(classes)): # if classes[i]=='entrance' # entrance_num.append(i) # if len(classes)>1: # if(contains_duplicates(classes)==False): # color = (50, 89, 170) # width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) # x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax # x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax # reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) # predictions = loaded_model.predict(reg_input) # a1_pred = predictions[0] # b1_pred = predictions[1] # c1_pred = predictions[2] # d1_pred = predictions[3] # xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) # ###IOU### # GT_bbox_area = (xmax - xmin + 1) * ( ymax -ymin + 1) # ########### # ##check entrace## # Pred_bbox_area =(x_bottomright_p - x_topleft_p + 1 ) * ( y_bottomright_p -y_topleft_p + 1) # x_top_left =np.max([x_topleft_gt, x_topleft_p]) # y_top_left = np.max([y_topleft_gt, y_topleft_p]) # x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p]) # y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p]) # # intersection_area = (x_bottom_right- x_top_left + 1) * (y_bottom_right-y_top_left + 1) # # union_area = (GT_bbox_area + Pred_bbox_area - intersection_area) # # iou_check.append(intersection_area/union_area) # # for j in len(iou_check): # if entrance_num[j]<iou_check.max: # track.delete # # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] if (class_name == 'entrance'): print("RED Tracker ID: {}, Class: {}".format( str(track.track_id), class_name)) blk = np.zeros(frame.shape, np.uint8) cv2.rectangle(blk, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), cv2.FILLED) frame = cv2.addWeighted(frame, 1.0, blk, 0.5, 1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) avg.append(fps) print("avg fps {}".format(statistics.mean(avg))) cv2.putText(frame, "FPS: %.2f" % fps, (50, 500), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (66, 245, 141), 2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 # 最大余弦距离 nn_budget = None # ?? nms_max_overlap = 1.0 # 非极大值抑制?? # deep_sort model_filename = 'model_data/mars-small128.pb' # model_filename = 'model_data/darknet_yolov3_model.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # encoder编码器 metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # 余弦度量 tracker = Tracker(metric) # 追踪函数 writeVideo_flag = True #video_capture = cv2.VideoCapture(0) # 获取摄像头数据 video_capture = cv2.VideoCapture('test5.mp4') # 获取视频数据 if writeVideo_flag: # Define the codec and create VideoWriter object 定义编码器,并创建 videowriter 对象 w = int(video_capture.get(3)) h = int(video_capture.get(4)) print("video的w:", w, "。video的h:", h) fourcc = cv2.VideoWriter_fourcc(*'MJPG') # 使用MJPG将视频提取成图片 out = cv2.VideoWriter('out.avi', fourcc, 15, (w, h)) out_m = cv2.VideoWriter('out_m.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: print("当前是第", int(video_capture.get(1)), "帧") ret, frame = video_capture.read() # frame 是3维矩阵 # print("frame是:", frame) if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb bgr图像转rgb图像 # 视频帧里读取出来的图像一帧(image)是PIL.Image.Image属性 img = np.asarray(image) # 对原来image的Image属性,转换为numpy.ndarray属性 boxs = yolo.detect_image(image) # 转换后的图像,用yolo去检测 # print("boxs的type", type(boxs)) boxs的type <class 'list'> # print("boxs=", boxs, "\n\n") # 和下面的boxes一样 features = encoder(frame, boxs) # 编码器提取features # score to 1.0 here. 这里得分是1分 detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # deep_sort在内存中的位置 # print("detections", detections) # deep_sort.detection.Detection object at 0x000002A8FBC6D7B8 # Run non-maxima suppression. 运行非极大值抑制 # boxes是左上角的(x,y)坐标 boxes = np.array([d.tlwh for d in detections ]) # tlwh是检测框的(x,y)和宽和高 ,是ndarray数组 # print("boxes", boxes) # print("boxes的shape", boxes.shape, "\nshape元组有几个数:", len(boxes.shape)) # boxes 的形状 # print("boxes的shape", boxes.shape, "\n") # boxes 的形状 a = boxes.shape[0] if len(boxes.shape) == 1: b = 0 else: b = boxes.shape[1] # print("(a,b)=", a, b) # (a,b)即为boxes的shape i = 1 if b != 0: # print("into !=0") while i <= a: box_temp = boxes[i - 1, :] # 获取到每一个检测目标的左上角坐标 # print("boxes的第", i, "行:", box_temp) x_center, y_center = travel(box_temp) # 调用travel函数,得到检测框中心点位置 anchor_width = box_temp[2] # anchor_box的宽 anchor_height = box_temp[3] # anchor_box的高 # print("得到的x_center=", x_center, "y_center", y_center) i += 1 print("one frame detection end\n") scores = np.array([d.confidence for d in detections]) # 检测的置信度 # 目录indices(类型type是indices),将检测目标通过nms(最大值抑制),减小重叠造成的影响,再存入indices indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices ] # detection的类型type是indices的list,就是把indices复制过来 # print("detection的内容:", detections) # detection的内容是检测目标在内存中的位置 # 先看看track更新之前id是什么 for track in tracker.tracks: a = track.track_id print("当前的track id是", a) name = track.age print("当前的track name是", name) cov = track.covariance print("当前的track covariance是", cov) fea = track.features print("当前的track features是", fea) hit = track.hits print("当前的track hits是", hit) mea = track.mean print("当前的track mean是", mea) sta = track.state print("当前的track sta是", sta) tsu = track.time_since_update print("当前的track id是", tsu) # Call the tracker 调用追踪器 tracker.predict() tracker.update(detections) follow_id = [] follow = [] # 画阈值线 cv2.rectangle(img, (0, int(h / 2)), (int(w), int(h / 2)), (255, 0, 255), 1) cv2.rectangle(img, (int(w / 2), 0), (int(w / 2), int(h)), (255, 0, 255), 1) # 废弃id数 cv2.putText(img, "waste id is" + str(count), (660, 100), 0, 1, (125, 255, 125), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # print("tlbr之前bbox", bbox) bbox = track.to_tlbr() print("bbox是:", bbox) print("在添加之前,follow_id当前是:", follow_id) print("在添加之前,follow当前是:", follow) follow_id.append(track.track_id) follow.append((bbox[0], bbox[1], bbox[2], bbox[3])) # 添加之后follow_id和follow数组的输出,在compare函数里 follow, follow_id = compare(follow, follow_id, bbox, track, w, h) # 进入此函数比较位置信息 length2 = len(follow_id) # 计算follow_id数组长度 print("length2", length2) print("follow", follow) print("follow_id", follow_id) print("follow_id[length2-1]", follow_id[length2 - 1]) print("follow[follow_id.index(follow_id[length2 - 1])][0]", follow[follow_id.index(follow_id[length2 - 1])][0]) print("type follow[follow_id.index(follow_id[length2 - 1])][0]", type(follow[follow_id.index(follow_id[length2 - 1])][0])) # 白色框 (追踪框)BGR cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # cv2.rectangle(img, ( # int(follow[follow_id.index(follow_id[length2 - 1])][0]), # int(follow[follow_id.index(follow_id[length2 - 1])][1])), # (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # 绿色文本 (追踪ID)putText(画面,id,位置坐标,字体,字体大小,颜色,字体厚度,线型 # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (255, 255, 0), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]) + 8, int(bbox[1]) + 8), 0, 1, (0, 255, 0), 2) # print("int(follow_id[length2 - 1])", (int(follow_id[length2 - 1]) - int(count))) # 把folwaste_id减为实际追踪到 a1 = int(follow_id[length2 - 1]) - 1 # a1 = int(follow_id[length2 - 1]) # cv2.putText(img, str(a1), (int(bbox[0]) + 8, int(bbox[1]) + 8), 0, 1, (0, 255, 0), 2) cv2.putText(img, str(a1), (int(bbox[0]) + 8, int(bbox[1]) + 8), 0, 1, (0, 255, 0), 2) print("追踪ID是:", track.track_id) # 视频检测框上方输出当前ID cv2.putText(frame, "current id is " + str(track.track_id), (int(bbox[0]), 40), 0, 1, (125, 155, 125), 2) cv2.putText(img, "current id is" + str(a1), (int(bbox[0]), 80), 0, 1, (125, 255, 0), 2) print("one frame track end") for fol in follow: print("fol是", fol) cv2.rectangle(img, (int(fol[0]), int(fol[1])), (int(fol[2]), int(fol[3])), (255, 255, 255), 2) for det in detections: bbox = det.to_tlbr() # 蓝色框(检测框)BGR,而不是RGB cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # h, w, l = np.shape(frame) # print("hwl:", h, w, l) # 每一帧视频的显示 cv2.imshow('origin Frame', frame) # cv2.rectangle(img, (400, 200), (500, 600), (255, 255, 0), 2) # 图像中指定位置画框 记得删除 cv2.imshow('modify frame', img) # 显示当前帧,会随视频走动 if writeVideo_flag: # save a frame 保存每一帧 out.write(frame) # 存入out.avi里 out_m.write(img) # 第二个视频窗口的保存 frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') # 存入detection.txt(list_file) if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') # 控制台输出fps fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) print("一帧结束\n\n") # Press Q to stop! 关闭窗口 if cv2.waitKey(1) & 0xFF == ord('q'): break # 清空视频流 video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) ppe_input_size = FLAGS.ppe_size helmet_input_size = FLAGS.helmet_size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) ppe_detector = create_ppe_detector(ppe_input_size) helmet_detector = create_helmet_detector(helmet_input_size) nacho_image1 = face_recognition.load_image_file("./data/faces/nacho1.jpg") nacho_image2 = face_recognition.load_image_file("./data/faces/nacho2.jpg") nacho_image3 = face_recognition.load_image_file("./data/faces/nacho3.jpg") nacho_face_encoding1 = face_recognition.face_encodings(nacho_image1)[0] nacho_face_encoding2 = face_recognition.face_encodings(nacho_image2)[0] nacho_face_encoding3 = face_recognition.face_encodings(nacho_image3)[0] known_face_encodings = [ nacho_face_encoding1, nacho_face_encoding2, nacho_face_encoding3 ] known_face_names = ["Nacho", "Nacho", "Nacho"] face_locations = [] face_encodings = [] face_names = [] max_cosine_distance = 0.7 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) Track_only = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame_size = frame.shape[:2] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) img_in = tf.expand_dims(frame, 0) img_in = transform_images(img_in, helmet_input_size) image_data = utils.image_preprocess(np.copy(frame), [ppe_input_size, ppe_input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': ppe_pred_bbox = ppe_detector.predict(image_data) elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) ppe_pred_bbox = [] result = ppe_detector(batched_input) for _, value in result.items(): value = value.numpy() ppe_pred_bbox.append(value) helmet_pred_bbox = helmet_detector.predict(img_in) # face_locations = face_recognition.face_locations(small_frame) face_locations = face_recognition.face_locations(frame) face_encodings = face_recognition.face_encodings(frame, face_locations) face_names = [] for face_encoding in face_encodings: matches = face_recognition.compare_faces(known_face_encodings, face_encoding) name = "Unknown" # if True in matches: # first_match_index = matches.index(True) # name = known_face_names[first_match_index] face_distances = face_recognition.face_distance( known_face_encodings, face_encoding) best_match_index = np.argmin(face_distances) if matches[best_match_index]: name = known_face_names[best_match_index] face_names.append(name) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms ppe_bboxes = post_process_boxes(ppe_pred_bbox, 'yolov4', frame_size, ppe_input_size) helmet_bboxes = post_process_boxes(helmet_pred_bbox, 'yolov3', frame_size, helmet_input_size) face_bboxes = [] for (top, right, bottom, left), name in zip(face_locations, face_names): # top *= 4 # left *= 4 # right *= 4 # bottom *= 4 face_bboxes.append([left, top, right, bottom, name]) bboxes = utils.calculate_status(ppe_bboxes, helmet_bboxes, []) boxes, safety_scores, site_roles, face_names = [], [], [], [] for bbox in bboxes: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) safety_scores.append(bbox[4]) site_roles.append(bbox[5]) face_names.append("None") for bbox in face_bboxes: boxes.append( [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]) safety_scores.append(0) site_roles.append(-1) face_names.append(bbox[4]) boxes = np.array(boxes) safety_scores = np.array(safety_scores) site_roles = np.array(site_roles) face_names = np.array(face_names) features = np.array(encoder(frame, boxes)) detections = [ Detection(bbox, 0.9, 0, feature, safety_score, site_role, face_name) for bbox, feature, safety_score, site_role, face_name in zip( boxes, features, safety_scores, site_roles, face_names) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() tracking_id = track.track_id safety_score = track.get_safety_score() site_role = track.get_site_role() face_name = track.get_face_name() if site_role == -1: to_add = [face_name, site_role, tracking_id] else: to_add = [safety_score, site_role, tracking_id] tracked_bboxes.append(bbox.tolist() + to_add) image = utils.draw_demo(frame, tracked_bboxes) image = cv2.putText(image, "Time: {:.2f} FPS".format(fps), (0, 24), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # total persons tracked trackedPersons = {} # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) if FLAGS.person_only: allowed_classes = ['person'] if FLAGS.person_bags: allowed_classes = ['person', 'backpack', 'handbag', 'suitcase'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # show background if show waiting time if FLAGS.waiting_time and len(tracker.tracks) > 0: cv2.rectangle(frame, (0, 0), (200, 200), (255, 255, 255), -1) cv2.putText(frame, "time lapsed", (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.3, (0, 0, 0), 2) # update tracks for (t, track) in enumerate(tracker.tracks): if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # show waiting time of each person if FLAGS.waiting_time and len(tracker.tracks) > 0: if track.track_id not in trackedPersons: trackedPersons[track.track_id] = 1 trackedPersons[track.track_id] = ( trackedPersons[track.track_id] + 1) waitedTime = round(trackedPersons[track.track_id] / 30, 1) cv2.putText( frame, "pers-" + str(track.track_id) + ": " + str(waitedTime), (5, (t * 20) + 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 2) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
class DeepSort(Process): def __init__(self, video_serial, isStop, gpuIndex, trackingQueue, resultQueue): Process.__init__(self) self.daemon = True self.encoder = None self.tracker = None self.isStop = isStop self.isDisplay = False self.gpuIndex = gpuIndex self.video_serial = video_serial self.trackingQueue = trackingQueue self.resultQueue = resultQueue def run(self): setproctitle.setproctitle("Tracker {}".format(self.video_serial)) print('Tracker {}'.format(self.video_serial)) max_cosine_distance = 0.45 nn_budget = 100 self.encoder = gdet.create_box_encoder(imgEncPath, batch_size=1, gpu_index=self.gpuIndex) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric, max_iou_distance=0.7, max_age=50, n_init=5) while self.isStop.value is False: while not self.trackingQueue.empty(): robotId, videoId, msg, frame, bboxes, confidences, objectTypes, targetObjects = self.trackingQueue.get( ) video_serial = robotId + "-" + videoId print('Tracker {} at keyframe {}'.format( video_serial, msg['keyframe'])) features = self.encoder(frame, bboxes) detections = [ Detection(bbox, confidence, feature, objectType) for bbox, confidence, feature, objectType in zip( bboxes, confidences, features, objectTypes) ] indices = [i for i in np.arange(len(detections)) \ if detections[i].confidence > 0.8 and detections[i].objectType in targetObjects] detections = [detections[i] for i in indices] msg['detectedObjects'] = [ msg['detectedObjects'][i] for i in indices ] print("detection indices: {}".format(indices)) # Call the tracker self.tracker.predict() self.tracker.update(detections) if self.isDisplay: displayFrame = frame.copy() for detection_id, detectedObject in zip( np.arange(len(msg['detectedObjects'])), msg['detectedObjects']): for track in self.tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 0: print("Tracker {} at keyframe {} track {} missed x {} y {}".format( \ self.video_serial, msg['keyframe'], str(track.track_id), int(track.to_tlwh()[0]), int(track.to_tlwh()[1]))) bbox = track.to_tlbr() if self.isDisplay: cv2.rectangle(displayFrame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (127, 127, 127), 2) cv2.putText(displayFrame, "{}".format(str(track.track_id)), (int(bbox[0]), int(bbox[1]) - 20), 0, 5e-3 * 100, (0, 127, 0), 2) continue if track.detection_id == detection_id: print("Tracker {} at keyframe {} track {} {} x {} y {}".format( \ self.video_serial, msg['keyframe'], detectedObject["objectType"] \ , str(track.track_id), int(track.to_tlwh()[0]), int(track.to_tlwh()[1]))) detectedObject["track_id"] = str(track.track_id) tracking_bbox = track.to_tlwh() detectedObject["tracking_bbox"] = { "x": tracking_bbox[0], "y": tracking_bbox[1], "w": tracking_bbox[2], "h": tracking_bbox[3], } if self.isDisplay: bbox = track.to_tlbr() cv2.rectangle(displayFrame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(displayFrame, "{} {}".format(detectedObject["objectType"], \ str(track.track_id)),(int(bbox[0]), int(bbox[1]) - 20), 0, 5e-3 * 100, (0,255,0), 2) break self.resultQueue.put([robotId, videoId, msg]) if self.isDisplay: print("Tracker {} show frame".format(self.video_serial)) title = "track : {}".format(self.video_serial) cv2.putText(displayFrame, "keyframe {}".format(msg['keyframe']), (30, 100), 0, 5e-3 * 100, (0, 0, 255), 2) cv2.imshow(title, displayFrame) cv2.waitKey(1) cv2.waitKey(1) sys.stdout.flush() cv2.waitKey(1) sys.stdout.flush() print("Tracker {} Stopped".format(self.video_serial))
def main(yolo): os.chdir('..') send_to_GUI = 0 video_record = 1 source = 'RPi' # 0 for webcam or RPi or filename FLAGScsv = 0 dict_prof = {} if FLAGScsv: csv_obj = save_csv() id_stay_old = [[], []] colors = { "male": (0, 0, 255), "female": (255, 0, 0), "None": (255, 255, 255) } device_obj = device_register() if send_to_GUI: # send video to note's GUI gst_out = cv2.VideoWriter( 'appsrc ! videoconvert ! jpegenc quality=12 ! tcpserversink host=0.0.0.0 port=6007 sync=false', 0, 15, (416, 416)) # Definition of the parameters max_cosine_distance = 1.5 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'deep_sort/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=8) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_iou_distance=0.7, max_age=50, n_init=3, _next_id=1) if source == 'RPi': video_capture = connect_RPi() else: video_capture = cv2.VideoCapture(source) video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1) print('video source : ', source) if video_record: out = cv2.VideoWriter() out.open('output.mp4', cv2.VideoWriter_fourcc(*'H264'), 25, (1920, 1080), True) # ___________________________________________________________________________________________________________________________________________MAIN LOOP t_fps = [time.time()] while True: for i in range(round(20 / 8)): video_capture.grab() ret, frame = video_capture.read() if not ret: if source == 'RPi': print('[ INFO ] No frame received from RPi: wait for 5 sec') time.sleep(5) video_capture = connect_RPi() continue else: video_capture = cv2.VideoCapture(source) video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1) continue frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # ______________________________________________________________________________________________________________________________DETECT WITH YOLO [gen_things, dev_things ] = yolo.detect_image(frame, boxes_only=True) # main detect function HERE features_gen = encoder(frame, gen_things[0]) detections_gen = [ Detection(bbox, 1.0, feature_gen) for bbox, feature_gen in zip(gen_things[0], features_gen) ] features_dev = encoder(frame, dev_things[0]) detections_dev = [ Detection(bbox, 1.0, feature_dev) for bbox, feature_dev in zip(dev_things[0], features_dev) ] device_obj.startframe(detections_dev) # ______________________________________________________________________________________________________________________________DRAW DEVICE for i in range(0, len(detections_dev)): bbox = detections_dev[i].to_tlbr() label = dev_things[1][i] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, label, (int(bbox[0]), int(bbox[1]) + 30), cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 200, (255, 0, 0), 2) # ______________________________________________________________________________________________________________________________Call the tracker tracker.predict() tracker.update(detections_gen, gen_things[1]) # feed detections # __________________________________________________________________________________________________________________________DRAW TRACK RECTANGLE id_stay = [[], []] for track in tracker.tracks: #dev_1p = {track.track_id:None} if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() #(min x, miny, max x, max y) bcenter = track.to_xyah( ) #(center x, center y, aspect ratio,height) dict_prof[track.track_id] = [[str(track.gender)], []] # check device if (len(detections_dev) != 0) and (len(detections_gen) != 0): # detected some thing euc_1p = device_obj.update_person(bcenter, track.track_id) for connect in euc_1p: #each person if connect is not None: cv2.line(frame, (int(bcenter[0]), int(bcenter[1])), (int(connect[1]), int(connect[2])), (0, 255, 0), 3) device_label = dev_things[1][int(connect[0])] if device_label not in dict_prof[ track.track_id]: # not write the same device dict_prof[track.track_id][1].append(device_label) if track.gender == 'male': # Avoid None id_stay[0].append(track.track_id) dict_prof[track.track_id][0] = ['male'] if track.gender == 'female': id_stay[1].append(track.track_id) dict_prof[track.track_id][0] = ['female'] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), colors[str(track.gender)], 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1]) + 30), cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 200, (0, 255, 0), 3) cv2.putText(frame, str(track.gender), (int(bbox[0]), int(bbox[1]) + 70), cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 200, (0, 255, 0), 3) frame = cv2.cvtColor( frame, cv2.COLOR_RGB2BGR) #change to BGR for showing with OpenCV # __________________________________________________________________________________________________________________________ FRAME RATE things t_fps.append(time.time()) fps = 1 / (t_fps[1] - t_fps[0]) t_fps.pop(0) cv2.putText(frame, 'FPS : {:.2f}'.format(fps), (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 100, (0, 0, 255), 2) out.write( frame) if video_record else None # write frame if record to file if send_to_GUI: frame = cv2.resize(frame, (416, 416)) gst_out.write(frame) print('FPS : {:.2f}'.format(fps)) else: cv2.imshow('', frame) if (id_stay != id_stay_old ) and FLAGScsv: # save csv if people in frame have changed csv_obj.save_event(id_stay) if cv2.waitKey(1) & 0xFF == ord('q'): break id_stay_old = id_stay out.release() if video_record else None gst_out.release() if send_to_GUI else None video_capture.release() cv2.destroyAllWindows() if FLAGScsv: csv_obj.save_profile(dict_prof)
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break; t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update >1 : continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(): # Definition of the parameters max_cosine_distance = 2.0 nn_budget = None nms_max_overlap = 3.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True # show object box blue when detect writeVideo_flag = True # record video ouput defaultSkipFrames = 5 # skipped frames between detections defaultConfidences = 0.5 # set up collection of door H1 = 245 W1 = 370 H2 = 278 W2 = 480 H = None W = None R = 80 # min R is 56 door_dict = setup_door(H1, W1, H2, W2, R) totalFrames = 0 totalIn = 0 # create a empty list of centroid to count traffic pts = [deque(maxlen=30) for _ in range(9999)] file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4' video_capture = cv2.VideoCapture(file_path) fps_imutils = imutils.video.FPS().start() if writeVideo_flag: fourcc = cv2.VideoWriter_fourcc(*'MP4V') out = cv2.VideoWriter('output_yolov4.mp4', fourcc, 3, (736, 480)) while True: oke, frame = video_capture.read() # frame shape 640*480*3 if not oke: break frame = cv2.resize(frame, (736, 480)) # if the frame dimensions are empty, set them if W is None or H is None: (H, W) = frame.shape[:2] # calculate video time videotime = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Draw a door line for w in range(W1, W2): cv2.circle(frame, (w, door_dict[w]), 1, (0, 255, 255), -1) cv2.circle(frame, (W1, H1), 4, (0, 0, 255), -1) cv2.circle(frame, (W2, H2), 4, (0, 0, 255), -1) if totalFrames % defaultSkipFrames == 0: boxes, confidence, classes = detect_image( frame, H, W, defaultConfidences) # average time: 1.2s features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: det_cls = det.cls score = "%.2f" % (det.confidence * 100) + "%" cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3]) - 10), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1) for track in tracker.tracks: if not track.is_confirmed(): continue bbox = track.to_tlbr() if not_count_staff(frame, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])): # adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 255), 2) cv2.putText(frame, "STAFF", (int(bbox[0]), int(bbox[1]) - 10), 0, 1e-3 * frame.shape[0], (0, 0, 255), 1) continue else: # adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) x = [c[0] for c in pts[track.track_id]] y = [c[1] for c in pts[track.track_id]] centroid_x = int(((bbox[0]) + (bbox[2])) / 2) centroid_y = int(((bbox[1]) + (bbox[3])) / 2) if not track.Counted and centroid_x in range(W1, W2): if centroid_y < np.mean(y) and door_dict[ centroid_x] > centroid_y and np.max(x) - np.min( x) > 20: totalIn += 1 track.Counted = True print(track.track_id, track.Counted) cv2.circle(frame, (centroid_x, centroid_y), 4, (0, 255, 0), -1) pts[track.track_id].append((centroid_x, centroid_y)) info = [("Time", "{:.4f}".format(videotime)), ("In", totalIn)] # loop over the info tuples and draw them on our frame for (i, (k, v)) in enumerate(info): text = "{}: {}".format(k, v) cv2.putText(frame, text, (W - 150, ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) if writeVideo_flag: # save a frame out.write(frame) if show_detections: cv2.imshow('People counter', frame) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break else: # Call the tracker tracker.predict() tracker.update(detections) fps_imutils.update() totalFrames += 1 fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if writeVideo_flag: out.release() video_capture.release() cv2.destroyAllWindows()