def read(stack): print('Process to read: %s' % os.getpid()) yolo = YOLO() # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort d = os.path.dirname(__file__) model_filename = d + '/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) max_boxs = 0 face = ['17208019'] #目标上一帧的点 history = {} #id和标签的字典 person = {} #赋予新标签的id列表 change = [] while True: if len(stack) != 0: frame = stack.pop() t1 = time.time() frame_count = 0 localtime = time.asctime(time.localtime(time.time())) utils.draw(frame, line.readline()) # 获取警戒线 transboundaryline = line.readline() utils.draw(frame, transboundaryline) img = Image.fromarray(frame) #img.save('frame.jpg') ''' cv2.line(frame, (837, 393), (930, 300), (0, 255, 255), 3) transboundaryline = t.line_detect_possible_demo(frame) ''' # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if len(boxs) > max_boxs: max_boxs = len(boxs) # Call the tracker tracker.predict() tracker.update(detections) #一帧信息 info = {} target = [] for track in tracker.tracks: #一帧中的目标 per_info = {} if not track.is_confirmed() or track.time_since_update > 1: continue if track.track_id not in person: person[track.track_id] = str(track.track_id) bbox = track.to_tlbr() PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2) PointY = bbox[3] dis = int(PointX) - 1200 try: if dis < 15: if track.track_id not in change: person[track.track_id] = face.pop(0) change.append(track.track_id) except: print('非法入侵') #当前目标 if track.track_id not in change: per_info['worker_id'] = 'unknow' + str(track.track_id) else: per_info['worker_id'] = person[track.track_id] #当前目标坐标 yoloPoint = (int(PointX), int(PointY)) per_info['current_point'] = yoloPoint # 卡尔曼滤波预测 if per_info['worker_id'] not in utils.KalmanNmae: utils.myKalman(per_info['worker_id']) if per_info['worker_id'] not in utils.lmp: utils.setLMP(per_info['worker_id']) cpx, cpy = utils.predict(yoloPoint[0], yoloPoint[1], per_info['worker_id']) if cpx[0] == 0.0 or cpy[0] == 0.0: cpx[0] = yoloPoint[0] cpy[0] = yoloPoint[1] if frame_count > 20: per_info['next_point'] = (int(cpx), int(cpy)) else: per_info['next_point'] = yoloPoint # 写入越线情况 if per_info['worker_id'] in history: per_info['transboundary'] = 'no' #print(transboundaryline) line1 = [ per_info['next_point'], history[per_info['worker_id']] ] a = line.IsIntersec2(transboundaryline, line1) if a == '有交点': print('越线提醒') per_info['transboundary'] = 'yes' history[per_info['worker_id']] = per_info['current_point'] frame_count = frame_count + 1 #print(per_info) #画目标框 #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, per_info['worker_id'], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) target.append(per_info) info['time'] = localtime #info['frame'] = str(img.tolist()).encode('base64') info['frame'] = 'frame' info['target'] = target #写入josn info_json = json.dumps(info) info_queue.put(info_json) getInfo(info_queue) cv2.imshow("img", frame) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break
return tracker class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] yolo = YoloV3(classes=len(class_names)) yolo.load_weights('./weights/yolov3.tf') max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) vid = cv2.VideoCapture('./data/video/' + name + '.mp4') # get video dimensions width = 0 height = 0 if vid.isOpened(): width = vid.get(3) # float `width` height = vid.get(4) # float `height` multiTracker = cv2.MultiTracker_create() def main():
def main(yolo, video_path=0, save_path=None): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None # openpose w, h = model_wh('0x0') model = 'mobilenet_thin' config = tf.ConfigProto(device_count={'gpu': 0}) config.gpu_options.per_process_gpu_memory_fraction = 0.3 e = TfPoseEstimator(get_graph_path(model), target_size=(64, 64), tf_config=config) # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) video = Video(video_path) # video_capture = cv2.VideoCapture() # 'data/person.mp4' w = video.w h = video.h tracker = Tracker(metric, img_shape=(w, h), max_eu_dis=0.1 * np.sqrt((w**2 + h**2))) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(save_path, fourcc, 7, (w, h)) list_file = open(output_dir + 'detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video.video_capture.read() # frame shape 640*480*3 if ret is not True or frame is None: break t1 = time.time() image = Image.fromarray(frame) boxes, scores, _ = yolo.detect_image(image) # start = time.time() features = encoder(frame, boxes) # 提取到每个框的特征 # end = time.time() # print(end-start) detections = [ Detection(bbox_and_feature[0], scores[idx], bbox_and_feature[1]) for idx, bbox_and_feature in enumerate(zip(boxes, features)) ] # 保存到一个类中 # Call the tracker tracker.predict() tracker.update(detections, np.asarray(image)) humans = get_keypoints(image, e) frame = draw_humans(image, humans, imgcopy=False) for index, track in enumerate(tracker.tracks): if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "{}".format(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if save_path is not None: # Define the codec and create VideoWriter object # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxes) != 0: for i in range(0, len(boxes)): list_file.write( str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str(boxes[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 # print("fps= %f" % fps) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video.video_capture.release() if save_path is not None: out.release() list_file.close() cv2.destroyAllWindows()
os.path.dirname(__file__))) + os.sep + "logs" + os.sep make_dir(log_file_folder) log_file_str = log_file_folder + os.sep + "text.log" th = handlers.TimedRotatingFileHandler(filename=log_file_str, when='H', encoding='utf-8') th.setFormatter(format_str) logger.addHandler(sh) logger.addHandler(th) if __name__ == '__main__': # Deep SORT 跟踪器 encoder = generate_detections.create_box_encoder(ARGS.model_feature, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", ARGS.min_score, None) tracker = Tracker(metric) # 载入模型 yolo = YOLO4(ARGS.model_yolo, ARGS.min_score) # 读取摄像头实时图像数据、或视频文件 try: video = cv2.VideoCapture(int(ARGS.video)) except: video = cv2.VideoCapture(ARGS.video) # 输出保存视频 fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = video.get(cv2.CAP_PROP_FPS) size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) #video_path ="C:/Users/Rashid Ali/Desktop/Person counting/V05.mp4" writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object video_width = int(video_capture.get(3)) video_height = int(video_capture.get(4)) video_fps = int(video_capture.get(5)) video_size = (int(video_width), int(video_height)) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out = cv2.VideoWriter('output_V09.mp4', fourcc, video_fps, video_size) list_file = open('detection_v2 .txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) ''' for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) ''' person_count = 0 count1 = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue person_count = person_count + 1 track_id = '{} {:.1f}'.format('Track_ID', track.track_id) count1 = '{} {:.1f}'.format('Total Persons Count', person_count) bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 100, (0, 255, 0), 2) cv2.putText(frame, str(count1), (20, 50), 0, 5e-3 * 100, (247, 7, 7), 2) cv2.putText(frame, '{:.2f}ms'.format((time.time() - t1) * 1000), (20, 20), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5, color=(247, 7, 7), thickness=1) cv2.imshow('Detections Window', frame) ''' #cv2.namedWindow("Detections Window", cv2.WINDOW_AUTOSIZE) #cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(247, 7, 7), 1) #cv2.putText(frame, str(count), (int(bbox[0]), int(bbox[1])), fontFace=cv2.FONT_HERSHEY_COMPLEX, # fontScale=0.5, color=(247, 7, 7), thickness=1) person_count=0 count1=0 for det in detections: person_count = person_count+1 count = '{} {:.1f}'.format('Count', person_count) count1 = '{} {:.1f}'.format('Total Count', person_count) bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(247, 7, 7), 2) cv2.putText(frame, str(count), (int(bbox[0]), int(bbox[1])), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5, color=(247, 7, 7), thickness=2) ''' if writeVideo_flag: # save a frame print("Writing detections in file") out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write(str(person_count) + '\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = [], custom_yolo=None, custom_classes=YOLO_CUSTOM_CLASSES, Custom_track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) # set a bunch of flags and variables for made baskets and possessions possession = None possession_list = [] combined_possession_avg = 0.5 total_basket_count=0 basket_frame_list = [] baskets_dict = {"Dark": 0, "Light": 0} made_basket_first_frame = 0 made_basket_frames = 0 basket_marked = False if custom_yolo: NUM_CUSTOM_CLASS = read_class_names(custom_classes) custom_key_list = list(NUM_CUSTOM_CLASS.keys()) custom_val_list = list(NUM_CUSTOM_CLASS.values()) frame_counter = 0 # loop through each frame in video while True: _, frame = vid.read() try: first_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB) frame_counter += 1 except: break image_data = image_preprocess(np.copy(first_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() # CUSTOM BLOCK FOR BASKETBALL if custom_yolo: if YOLO_FRAMEWORK == "tf": # use yolo model to make prediction on the image data custom_pred_bbox = custom_yolo.predict(image_data) # reshape our data to be in correct form for processing custom_pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in custom_pred_bbox] custom_pred_bbox = tf.concat(custom_pred_bbox, axis=0) # get boxes based on threshhold custom_bboxes = postprocess_boxes(custom_pred_bbox, original_frame, input_size, 0.3) # custom_bboxes = nms(custom_bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names custom_boxes, custom_scores, custom_names = [], [], [] for bbox in custom_bboxes: if len(Custom_track_only) !=0 and NUM_CUSTOM_CLASS[int(bbox[5])] in Custom_track_only or len(Custom_track_only) == 0: custom_boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)]) custom_scores.append(bbox[4]) custom_names.append(NUM_CUSTOM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. custom_boxes = np.array(custom_boxes) custom_names = np.array(custom_names) custom_scores = np.array(custom_scores) # take note of the highest "scoring" made basket and basketball obj in each frame highest_scoring_basketball = 0 basketball_box = None basketball_center = None highest_scoring_made_basket = 0 made_basket_box = None for i, bbox in enumerate(custom_bboxes): # loop through each bounding box to get the "best one" of the frame # we do this because sometimes our model will detect two, and we know there can only be one name = custom_names[i] score = round(custom_scores[i], 3) if name == 'basketball': if score > highest_scoring_basketball: highest_scoring_basketball = score basketball_box = bbox if name == 'made-basket': if score > .85 and score > highest_scoring_made_basket: highest_scoring_made_basket = score made_basket_box = bbox # if it sees a basketball, put a box on it and note the center (for possession) if basketball_box is not None: cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1])), (int(basketball_box[2]), int(basketball_box[3])), (0,0,255), 1) cv2.rectangle(original_frame, (int(basketball_box[0]), int(basketball_box[1]-30)), (int(basketball_box[0])+(10)*17, int(basketball_box[1])), (0,0,255), -1) cv2.putText(original_frame, "basketball" + "-" + str(highest_scoring_basketball),(int(basketball_box[0]), int(basketball_box[1]-10)),0, 0.5, (255,255,255),1) basketball_center = ( (basketball_box[2]+basketball_box[0])/2, (basketball_box[3]+basketball_box[1])/2 ) if made_basket_box is not None: # if theres a made basket put the box on it cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1])), (int(made_basket_box[2]), int(made_basket_box[3])), (0,255,0), 1) cv2.rectangle(original_frame, (int(made_basket_box[0]), int(made_basket_box[1]-30)), (int(made_basket_box[0])+(15)*17, int(made_basket_box[1])), (0,255,0), -1) cv2.putText(original_frame, "made-basket" + " " + str(highest_scoring_made_basket),(int(made_basket_box[0]), int(made_basket_box[1]-10)),0, 0.6, (0,0,0),1) if made_basket_frames == 0: # if this is the first frame in the sequence made_basket_first_frame = frame_counter # increment a counter for made basket frames made_basket_frames += 1 # if there were 3 consecuative frames AND we havnt marked the basket yet then lets count it! if made_basket_frames >= 3 and not basket_marked: basket_marked = True basket_frame_list.append(made_basket_first_frame) if possession: # record which "team" scored the basket baskets_dict[possession] += 1 # if no made basket make sure the made basket counter is at zero else: # no made basket made_basket_frames = 0 # 60 frames after a made basket we can reset the "marked basket" flag to False # in essence this means we start looking for made baskets again if basket_marked and frame_counter > basket_frame_list[-1] + 60: basket_marked = False # END CUSTOM BLOCK # PRESON PREDICTION and TRACKING BLOCK if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0: w = bbox[2].astype(int)-bbox[0].astype(int) h = bbox[3].astype(int)-bbox[1].astype(int) if h < height/3 and w < width/4: if h > 120: boxes.append([bbox[0].astype(int), bbox[1].astype(int), w, h]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) # detect jersey color using the tracked persons bounding box patches = [gdet.extract_image_patch(frame, box, [box[3], box[2]]) for box in boxes] color_ratios = [find_color(patch) for patch in patches] features = np.array(encoder(original_frame, boxes)) # mark the detection detections = [Detection(bbox, score, class_name, feature, color_ratio) for bbox, score, class_name, feature, color_ratio in zip(boxes, scores, names, features, color_ratios)] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] color_ratio_list = [] check_possession = False for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue color_ratio = track.get_color_ratio() color_ratio_list.append(color_ratio) bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class() #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index(class_name)] # Get predicted object index by object name tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function # if there is a basketball in the frame, and its "in" a ersons bounding box, check what box it is in for psosession if basketball_center: if basketball_center[0] >= bbox[0] and basketball_center[0] <= bbox[2]: if basketball_center[1] >= bbox[1] and basketball_center[1] <= bbox[3]: check_possession = True if color_ratio <= .2: # light team possession_list.append(0) else: # dark team possession_list.append(1) else: # no basketball in frame # possession_list.append(-1) # test_list.pop(0) pass # if the ball is in a bounding box, update out possession tracker if check_possession: if len(possession_list) > 60: # this function takes an average of the last 60 posessions marked to determine current position # it weights the most recent detections more # this algo is a WIP possession_list = possession_list[-60:] # full_avg = sum(possession_list)/len(possession) last_60_avg = sum(possession_list[-60:])/60 last_30_avg = sum(possession_list[-30:])/30 last_15_avg = sum(possession_list[-15:])/15 last_5_avg = sum(possession_list[-5:])/5 combined_possession_avg = round((last_60_avg + last_30_avg + last_15_avg + last_5_avg)/4,3) #most_common_possession = stats.mode(possession_list)[0] else: combined_possession_avg = round(sum(possession_list)/len(possession_list),3) # use our possession average to determine who has the ball right now if combined_possession_avg < 0.5: possession = "Light" elif combined_possession_avg > 0.5: possession = "Dark" # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, color_ratios=color_ratio_list, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2-t1) times_2.append(t3-t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times)/len(times)*1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2)/len(times_2)*1000) if possession == "Light": image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (50, 255, 255), 2) else: image = cv2.putText(image, "Posession: {}".format(possession), (width-400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # image = cv2.putText(image, "Light: {} Dark: {} None: {}".format(possession_list.count(0), possession_list.count(1), possession_list.count(-1)), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) image = cv2.putText(image, "Posession Avg: {}".format(combined_possession_avg), (400, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows() return_data = {"baskets_dict": baskets_dict, "basket_frame_list": basket_frame_list} print("video saved to {}".format(output_path)) return(return_data)
# opencv 2 if cv2.__version__.split(".")[0] == "2": frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) else: # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # initialize tracking module if args.get_tracking: tracking_objs = args.tracking_objs.split(",") tracker_dict = {} tracking_results_dict = {} tmp_tracking_results_dict = {} for tracking_obj in tracking_objs: metric = metric = nn_matching.NearestNeighborDistanceMetric( "cosine", args.max_cosine_distance, args.nn_budget) tracker_dict[tracking_obj] = Tracker( metric, max_iou_distance=args.max_iou_distance) tracking_results_dict[tracking_obj] = [] tmp_tracking_results_dict[tracking_obj] = {} # videoname = os.path.splitext(os.path.basename(videofile))[0] videoname = os.path.basename(videofile) video_obj_out_path = None if args.obj_out_dir is not None: # not saving box json to save time video_obj_out_path = os.path.join(args.obj_out_dir, videoname) if not os.path.exists(video_obj_out_path): os.makedirs(video_obj_out_path) video_queuer = VideoEnqueuer(args, vcap,
def main(yolo): global p1_flag, p2_flag, vline1, mid_line, vline2, lane_1, lane_2, lane_3, lane_4, lane_5, lane_6, mask, preset global cnt_lane_1, cnt_lane_2, cnt_lane_3, cnt_lane_4, cnt_lane_5, cnt_lane_6, global_point global speed_lane_1, speed_lane_2, speed_lane_3, speed_lane_4, speed_lane_5, speed_lane_6 ################# parameters ###################### track_len = 2 detect_interval = 4 of_track = [] preset = 0 alpha = 0.3 mm1, mm2, mm3, mm4, mm5, mm6 = 0, 0, 0, 0, 0, 0 v1, v2, v3, v4, v5, v6 = 0, 0, 0, 0, 0,0 ptn1, ptn2, ptn3, ptn4, ptn5, ptn6 = 0, 0, 0, 0, 0,0 prv1, prv2, prv3, prv4, prv5, prv6 = 0, 0, 0, 0, 0, 0 ms2kmh = 3.6 fps = 30 max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.3 counter = [] ################################################## # Deep SORT model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True #video_path = "./output/output.avi" video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) total_frame = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) fourcc = cv2.VideoWriter_fourcc(*'FMP4') video_fps = video_capture.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter('output/CH4_output.avi', fourcc, video_fps, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 frame_idx = 0 speed_dict = OrderedDict() ret, first_frame = video_capture.read() cal_mask1 = np.zeros_like(first_frame[:, :, 0]) cal_mask2 = np.zeros_like(first_frame[:, :, 0]) while True: ret, frame = video_capture.read() glob = frame.copy() cmask = frame.copy() # Channel and preset setting if ch == 8: if 0 <= frame_idx <= 480 or 2415 <= frame_idx <= 4203 or 6140 <=frame_idx<=7925 or 9864 <=frame_idx<=11648 or 13585 <= frame_idx <= 15370 or frame_idx >=17306: preset = 1 elif 559 <= frame_idx <= 2340 or 4275 <= frame_idx<=6064 or 8000 <= frame_idx<=9787 or 11730 <=frame_idx<=13513 or 15450 <=frame_idx<=17237: preset = 2 else: preset = 0 if preset == 1: vline1 = p1_vline1 mid_line = p1_mid_line vline2 = p1_vline2 lane_1 = p1_lane_1 lane_2 = p1_lane_2 lane_3 = p1_lane_3 lane_4 = p1_lane_4 lane_5 = p1_lane_5 lane_6 = p1_lane_6 global_point = p1_global_point mask = p1_mask # Polyline cv2.polylines(frame, [lane_1], True, (153,255,255)) cv2.polylines(frame, [lane_2], True, (255,204,204)) cv2.polylines(frame, [lane_3], True, (204,255,204)) cv2.polylines(frame, [lane_4], True, (255,204,255)) cv2.polylines(frame, [lane_5], True, (153,153,255)) cv2.polylines(frame, [lane_6], True, (102,255,153)) frame = cv2.line(frame, vline1[0], vline1[1], (0,0,255), 1) frame = cv2.line(frame, mid_line[0], mid_line[1], (0,0,255), 1) frame = cv2.line(frame, vline2[0], vline2[1], (0,0,255), 1) p1_flag = True view_polygon = np.array([[10, 1920], [380, 250], [680, 250], [1080, 480], [1080, 1920]]) cal_polygon = np.array([[361, 304], [755, 293], [1076, 480], [1077, 1067], [163, 1051]]) pg1 = np.array([[382, 347], [359, 347], [236, 833], [272, 832]]) # RT, LT, LB, RB pg2 = np.array([[460, 347], [434, 346], [456, 833], [505, 833]]) # LB, RT, LT, RB pg3 = np.array([[544, 345], [514, 345], [686, 833], [755, 832]]) # LB, LT, RT, RB pg4 = np.array([[630, 342], [598, 343], [924, 829], [991, 829]]) # LB, LT, LB, RB pg5 = np.array([[725, 343], [696, 345], [996, 650], [1056, 646]]) # RT, LB, LT, RB pg6 = np.array([[798, 340], [761, 340], [1037, 535], [1070, 530]]) # RT, LB, LT, RB cv2.fillConvexPoly(cal_mask1, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask1) if pg1.size>0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size>0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size>0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size>0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size>0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size>0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) elif preset == 2: vline1 = p2_vline1 mid_line = p2_mid_line vline2 = p2_vline2 lane_1 = p2_lane_1 lane_2 = p2_lane_2 lane_3 = p2_lane_3 lane_4 = p2_lane_4 lane_5 = p2_lane_5 lane_6 = p2_lane_6 global_point = p2_global_point mask = p2_mask # Polyline cv2.polylines(frame, [lane_1], True, (153,255,255)) cv2.polylines(frame, [lane_2], True, (255,204,204)) cv2.polylines(frame, [lane_3], True, (204,255,204)) cv2.polylines(frame, [lane_4], True, (255,204,255)) cv2.polylines(frame, [lane_5], True, (153,153,255)) cv2.polylines(frame, [lane_6], True, (102,255,153)) frame = cv2.line(frame, vline1[0], vline1[1], (0,0,255), 1) frame = cv2.line(frame, mid_line[0], mid_line[1], (0,0,255), 1) frame = cv2.line(frame, vline2[0], vline2[1], (0,0,255), 1) p2_flag = True view_polygon = np.array([[284, 649], [0, 1629], [1076, 1574], [1079, 888], [676, 634]]) cal_polygon = np.array([[896, 778], [244, 794], [105, 1271], [1077, 1245], [1077, 879]]) pg1 = np.array([[276, 846], [234, 847], [134, 1200], [199, 1198]]) # RT, LT, LB, RB pg2 = np.array([[418, 844], [375, 844], [384, 1196], [442, 1198]]) # LB, RT, LT, RB pg3 = np.array([[553, 843], [508, 844], [637, 1194], [706, 1194]]) # LB, LT, RT, RB pg4 = np.array([[686, 841], [637, 843], [886, 1190], [968, 1189]]) # LB, LT, LB, RB pg5 = np.array([[817, 837], [773, 841], [1005, 1051], [1060, 1047]]) # RT, LB, LT, RB pg6 = np.array([[966, 837], [919, 840], [1043, 929], [1087, 927]]) # RT, LT, LB, RB cv2.fillConvexPoly(cal_mask2, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask2) if pg1.size > 0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size > 0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size > 0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size > 0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size > 0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size > 0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) elif ch == 4: if 0 <= frame_idx <= 1751 or frame_idx >= 3655: preset = 1 elif 1797 <= frame_idx <= 3600: preset = 2 else: preset = 0 if preset == 1: lane_1 = p1_lane_1 lane_2 = p1_lane_2 lane_3 = p1_lane_3 lane_4 = p1_lane_4 lane_5 = p1_lane_5 lane_6 = p1_lane_6 global_point = p1_global_point mask = p1_mask # Polyline # cv2.polylines(frame, [lane_1], True, (153,255,255)) # cv2.polylines(frame, [lane_2], True, (255,204,204)) # cv2.polylines(frame, [lane_3], True, (204,255,204)) # cv2.polylines(frame, [lane_4], True, (255,204,255)) # cv2.polylines(frame, [lane_5], True, (153,153,255)) # cv2.polylines(frame, [lane_6], True, (102,255,153)) p1_flag = True view_polygon = np.array([[731, 563], [385, 567], [33, 1260], [1077, 1254], [1078, 812]]) cal_polygon = np.array([[914, 669],[286, 675], [89, 1083], [1078, 1083], [1078, 772]]) pg6 = np.array([[346, 686], [313, 686], [163, 992], [244, 996]]) # RT, LT, LB, RB pg5 = np.array([[430, 684], [401, 685], [338, 998], [420, 1000]]) # LB, RT, LT, RB pg4 = np.array([[534, 685], [506, 685], [547, 999], [631, 999]]) # LB, LT, RT, RB pg3 = np.array([[654, 685], [609, 684], [760, 1000], [839, 999]]) # LB, LT, LB, RB pg2 = np.array([[770, 685], [723, 684], [979, 999], [1051, 998]]) # RT, LB, LT, RB pg1 = np.array([[858, 683], [815, 683], [1031, 860], [1077, 857]]) # RT, LB, LT, RB cv2.fillConvexPoly(cal_mask1, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask1) if pg1.size > 0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size > 0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size > 0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size > 0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size > 0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size > 0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) elif preset == 2: lane_1 = p2_lane_1 lane_2 = p2_lane_2 lane_3 = p2_lane_3 lane_4 = p2_lane_4 lane_5 = p2_lane_5 lane_6 = p2_lane_6 global_point = p2_global_point mask = p2_mask # Polyline # cv2.polylines(frame, [lane_1], True, (153,255,255)) # cv2.polylines(frame, [lane_2], True, (255,204,204)) # cv2.polylines(frame, [lane_3], True, (204,255,204)) # cv2.polylines(frame, [lane_4], True, (255,204,255)) # cv2.polylines(frame, [lane_5], True, (153,153,255)) # cv2.polylines(frame, [lane_6], True, (102,255,153)) p2_flag = True view_polygon = np.array([[547, 609], [0, 1109], [1, 1271], [1078, 1278], [1079, 594]]) cal_polygon = np.array([[529, 611], [8, 1105], [1077, 1110], [1078, 599]]) pg6 = np.array([[556, 609], [493, 607], [108, 1033], [190, 1030]]) # RT, LT, LB, RB pg5 = np.array([[693, 604], [642, 602], [356, 1020], [455, 1020]]) # LB, RT, LT, RB pg4 = np.array([[812, 633], [765, 633], [604, 1026], [702, 1026]]) # LB, LT, RT, RB pg3 = np.array([[932, 638], [882, 636], [883, 1007], [953, 1001]]) # LB, LT, LB, RB pg2 = np.array([[1059, 641], [978, 638], [1028, 941], [1079, 916]]) # RT, LB, LT, RB pg1 = np.array([]) cv2.fillConvexPoly(cal_mask2, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask2) if pg1.size > 0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size > 0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size > 0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size > 0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size > 0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size > 0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) if ret != True: break t1 = time.time() cnt_lane_1 = cnt_lane_2 = cnt_lane_3 = cnt_lane_4 = cnt_lane_5 = cnt_lane_6 = 0 image = Image.fromarray(frame[...,::-1]) #bgr to rgb boxs,class_names = yolo.detect_image(image) #features is 128-dimension vector for each bounding box features = encoder(frame,boxs) # score to 1.0 here. # score 1.0 means that bbox's class score after yolo is 100% detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) # detections is object detection result at current frame # it may contain many bbox or no bbox detections = [detections[i] for i in indices] # calculate lane by lane avg speed ## swhan method (optical flow) mm1, mm2, mm3, mm4, mm5, mm6 = 0, 0, 0, 0, 0,0 if len(of_track) > 0: img0, img1 = prev_gray, frame_gray p0 = np.float32([tr[-1] for tr in of_track]).reshape(-1, 1, 2) p1, _st, _err = cv2.calcOpticalFlowPyrLK(img0, img1, p0, None, **lk_params) p0r, _st, _err = cv2.calcOpticalFlowPyrLK(img1, img0, p1, None, **lk_params) d = abs(p0-p0r).reshape(-1, 2).max(-1) good = d < 1 new_of_tracks = [] for tr, (x, y), good_flag in zip(of_track, p1.reshape(-1, 2), good): if not good_flag: continue tr.append((x, y)) if len(tr) > track_len: del tr[0] new_of_tracks.append(tr) cv2.circle(frame, (x, y), 3, (0, 255, 0), -1) of_track = new_of_tracks for idx, tr in enumerate(of_track): #print(frame_idx, tr) if pg1.size > 0: result_pg1 = cv2.pointPolygonTest(pg1, tr[0],True) else: result_pg1 = -999 if pg2.size > 0: result_pg2 = cv2.pointPolygonTest(pg2, tr[0], True) else: result_pg2 = -999 if pg3.size > 0: result_pg3 = cv2.pointPolygonTest(pg3, tr[0], True) else: result_pg3 = -999 if pg4.size > 0: result_pg4 = cv2.pointPolygonTest(pg4, tr[0], True) else: result_pg5 = -999 if pg5.size > 0: result_pg5 = cv2.pointPolygonTest(pg5, tr[0], True) else: result_pg5 = -999 if pg6.size > 0: result_pg6 = cv2.pointPolygonTest(pg6, tr[0], True) else: result_pg6 = -999 if frame_idx % detect_interval == 0: if result_pg1 > 0: ptn1 += 1 if preset == 1: mm1 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm1 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm1 = mm1/ptn1 v1 = mmm1*fps*ms2kmh*6 if result_pg2 > 0: ptn2 += 1 if preset == 1: mm2 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm2 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm2 = mm2/ptn2 v2 = mmm2*fps*ms2kmh*6 if result_pg3 > 0: ptn3 += 1 if preset == 1: mm3 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm3 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm3 = mm3/ptn3 v3 = mmm3*fps*ms2kmh*6 if result_pg4 > 0: ptn4 += 1 if preset == 1: mm4 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm4 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm4 = mm4/ptn4 v4 = mmm4*fps*ms2kmh*6 if result_pg5 > 0: ptn5 += 1 if preset == 1: mm5 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm5 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm5 = mm5/ptn5 v5 = mmm5*fps*ms2kmh*6 if result_pg6 > 0: ptn6 += 1 if preset == 1: mm6 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm6 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm6 = mm6/ptn6 v6 = mmm6*fps*ms2kmh*6 mask = np.zeros_like(frame_gray) mask[:] = 255 for x, y in [np.int32(tr[-1]) for tr in of_track]: cv2.circle(mask, (x, y), 3, 0, -1) p = cv2.goodFeaturesToTrack(frame_gray, mask = mask, **feature_params) if p is not None: for x, y in np.float32(p).reshape(-1, 2): of_track.append([(x, y)]) prev_gray = frame_gray ## swhan method if frame_idx % detect_interval == 0: if ptn1 > 0: avg_speed_lane_1 = v1 prv1=v1 elif ptn1 == 0: avg_speed_lane_1 = 0 prv1 = 0 if ptn2 > 0: avg_speed_lane_2 = v2 prv2=v2 elif ptn2 == 0: avg_speed_lane_2 = 0 prv2 = 0 if ptn3 > 0: avg_speed_lane_3 = v3 prv3=v3 elif ptn3 == 0: avg_speed_lane_3 = 0 prv3 = 0 if ptn4 > 0: avg_speed_lane_4 = v4 prv4=v4 elif ptn4 == 0: avg_speed_lane_4 = 0 prv4 = 0 if ptn5 > 0: avg_speed_lane_5 = v5 prv5=v5 elif ptn5 == 0: avg_speed_lane_5 = 0 prv5 = 0 if ptn6 > 0: avg_speed_lane_6 = v6 prv6=v6 elif ptn6 == 0: avg_speed_lane_6 = 0 prv6 = 0 else: avg_speed_lane_1 = prv1 avg_speed_lane_2 = prv2 avg_speed_lane_3 = prv3 avg_speed_lane_4 = prv4 avg_speed_lane_5 = prv5 avg_speed_lane_6 = prv6 ptn1, ptn2, ptn3, ptn4, ptn5, ptn6 = 0, 0, 0, 0, 0, 0 # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] for det in detections: bbox = det.to_tlbr() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) if len(class_names) > 0: class_name = class_names[0] i += 1 center = (int(((bbox[0])+(bbox[2]))/2),int(((bbox[1])+(bbox[3]))/2)) # global point matching track.matching_point[0] = center[0] track.matching_point[1] = center[1] temp_list = [track.matching_point[0],track.matching_point[1],frame_idx] if len(track.matching_point_list) == 4: track.matching_point_list.append(temp_list) x1 = track.matching_point_list[0][0] y1 = track.matching_point_list[0][1] x2 = track.matching_point_list[-1][0] y2 = track.matching_point_list[-1][1] # If the pixel don't change, the speed should be zero. if x1 == x2 and y1 == y2: track.matching_point_list.pop(0) else: time1 = track.matching_point_list[0][2] time2 = track.matching_point_list[-1][2] if preset == 1: R_dist1 = convPtoR_1(x1,y1,x2,y2) t_time1 = (time2-time1)/30 speed = int(3.6*R_dist1 //t_time1) #print("time1 : ",time1, "time2 : ",time2) elif preset == 2: R_dist1 = convPtoR_2(x1,y1,x2,y2) t_time1 = (time2-time1)/30 speed = int(3.6*R_dist1 //t_time1) #print("time1 : ",time1, "time2 : ",time2) track.matching_point_list.pop(0) if frame_idx % 6 ==1 : track.speed = speed cv2.putText(frame, str(int(track.speed))+'km/h', (int(bbox[0]), int(bbox[1]+((bbox[3]-bbox[1])/2))),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 4) elif len(track.matching_point_list) == 0 : track.matching_point_list.append(temp_list) elif track.matching_point_list[-1][0] != track.matching_point[0] and track.matching_point_list[-1][1] != track.matching_point[1]: track.matching_point_list.append(temp_list) cv2.circle(frame, (track.matching_point[0], track.matching_point[1]), 5, (0,0,255),-1) # traffic lane by lane if cv2.pointPolygonTest(lane_1, center, False) >= 0: cnt_lane_1 += 1 track.driving_lane = 1 elif cv2.pointPolygonTest(lane_2, center, False) >= 0: cnt_lane_2 += 1 track.driving_lane = 2 elif cv2.pointPolygonTest(lane_3, center, False) >= 0: cnt_lane_3 += 1 track.driving_lane = 3 elif cv2.pointPolygonTest(lane_4, center, False) >= 0: cnt_lane_4 += 1 track.driving_lane = 4 elif cv2.pointPolygonTest(lane_5, center, False) >= 0: cnt_lane_5 += 1 track.driving_lane = 5 elif cv2.pointPolygonTest(lane_6, center, False) >= 0: cnt_lane_6 += 1 track.driving_lane = 6 cv2.putText(frame, "ID:"+str(track.track_id) + "/"+ str(track.driving_lane), (int(bbox[0]), int(bbox[1])-20),cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2) pts[track.track_id].append(center) ctime[track.time].append(time.time()) thickness = 5 #center point cv2.circle(frame, (center), 1, color, thickness) #draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness) # cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2) cv2.putText(frame, "Current Object Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2) cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3) cv2.putText(frame, "Traffic, Avg_speed", (570,40), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_1: " + str(cnt_lane_1)+', '+str(int(avg_speed_lane_1)), (500,80), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_2: " + str(cnt_lane_2)+', '+str(int(avg_speed_lane_2)), (500,120), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_3: " + str(cnt_lane_3)+', '+str(int(avg_speed_lane_3)), (500,160), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_4: " + str(cnt_lane_4)+', '+str(int(avg_speed_lane_4)), (500,200), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_5: " + str(cnt_lane_5)+', '+str(int(avg_speed_lane_5)), (500,240), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_6: " + str(cnt_lane_6)+', '+str(int(avg_speed_lane_6)), (500,280), 0, 1, (0,255,0), 2) frame_idx += 1 if writeVideo_flag: #save a frame # cv2.fillPoly(frame, [mask], (0,0,0)) # for i,v in enumerate(global_point): # cv2.circle(frame, v, 1, (0,255,255),-1) cv2.addWeighted(cmask, alpha, frame, 1 - alpha, 0, frame) out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 print(" ") print("[Finish]") end = time.time() video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) np.random.seed(42) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") writeVideo_flag = True asyncVideo_flag = False file_path = 'testvideo2.avi' # load the COCO class labels our YOLO model was trained on 加载我们的YOLO模型经过培训的COCO类标签 labelsPath = os.path.sep.join(["model_data", "coco_classes.txt"]) LABELS = open(labelsPath).read().strip().split("\n") # print(str(len(LABELS))+"load successfully") # print(LABELS) class_nums = np.zeros(80) counter = np.zeros(80) track_id_max = -1 if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('testvideo2_out.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() img_num = 0 frame_cnt = 0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 frame_copy = frame.copy() if ret != True or frame_cnt > 30: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs, confidence, class_names = yolo.detect_image(image) features = encoder(frame, boxs) detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) # print("print indices!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") # print(indices) # for i in indices: # print(str(i)+class_names[i][0]) # print(indices.shape) detections = [detections[i] for i in indices] class_names = [class_names[i] for i in indices] print("class_name:" + str(class_names)) class_IDs = [] current_nums = np.zeros(80) # class_IDs=[] for class_name in class_names: for i, LABEL in enumerate(LABELS): if class_name[0] == LABEL: current_nums[i] += 1 class_IDs.append(i) # print("person:"+str(current_nums[0])) cv2.putText(frame, 'Current', (20, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 255, 255), 2) cv2.putText(frame, 'Total', (180, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 255), 2) x1 = 20 y1 = 100 for i, cl in enumerate(current_nums): if cl > 0: cv2.putText(frame, LABELS[i] + "=" + str(cl), (x1, y1), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1) # 当前帧各类别数量 y1 = y1 + 20 for i, det in enumerate(detections): bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[1]) + 10), cv2.FONT_HERSHEY_DUPLEX, 0.3, (255, 255, 255), 1) # cv2.putText(frame, class_names[i],(int(bbox[0]), int(bbox[1])-5), 0, 5e-3 * 130, (0, 255, 0), 2) # cv2.putText(frame, class_names[i], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 130,(0, 255, 255), 2) print("Total of detections:" + str(len(detections))) # Call the tracker tracker.predict() tracker.update(detections, class_IDs) # for i, cl in enumerate(class_nums): # if cl > 0: # print("add: " + LABELS[i] + str(cl - class_last_nums[i])) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: print("not track.is_confirmed() or track.time_since_update > 1: " + str(track.track_id)) continue bbox = track.to_tlbr() color = [int(c) for c in COLORS[track.class_id % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.putText(frame, str(track.track_id) + ' ' + LABELS[track.class_id], (int(bbox[0]), int(bbox[1]) - 5), cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1) if track.track_id > track_id_max: counter[track.class_id] = counter[track.class_id] + 1 track_id_max = track.track_id dest = frame_copy[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] pdest = "./output_image/" + str(LABELS[track.class_id]) + str(int(counter[track.class_id])) + ".png" cv2.imwrite(pdest, dest) img_num += 1 # class_nums[track.class_id].append(track.track_id) # print(str(LABELS[track.class_id])+":"+class_nums[track.class_id]) # print("track.id: " + str(track.track_id)) # print("track.class_name: " + str(LABELS[track.class_id])) print(str(counter)) print("--------------------------该帧输出完毕!--------------------------------------") # cv2.putText(frame, 'Total', (200, 60), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 0, 0), 2) # x2 = 200 # y2 = 100 # for i, cl in enumerate(class_nums): # if cl > 0: # cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 0, 0), 2) # y2 = y2 + 20 cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # !!!!!!!!!输出FPS x2 = 180 y2 = 100 for i, cl in enumerate(counter): if cl > 0: cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 255), 1) y2 = y2 + 20 # cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f"%(fps)) frame_cnt += 1 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False file_path = 'input_video.avi' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov3.mp4', fourcc, 25, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidences = yolo.detect_image(image) boxes = np.array(boxes) confidences = np.array(confidences) # Run non-maxima suppression. indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, confidences) boxes = [boxes[i] for i in indices] confidences = [confidences[i] for i in indices] time1 = time.time() features = encoder(frame, boxes) time2 = time.time() detections = [ Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxes, confidences, features) ] # Call the tracker tracker.predict() tracker.update(detections) time3 = time.time() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) time4 = time.time() time_sum = time4 - t1 print("time:", (time1 - t1) / time_sum, (time2 - time1) / time_sum, (time3 - time2) / time_sum, (time4 - time3) / time_sum) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(_argv): with open("./config_birdview.yml", "r") as ymlfile: bird_view_cfg = yaml.load(ymlfile) width_og, height_og = 0, 0 corner_points = [] for section in bird_view_cfg: corner_points.append(bird_view_cfg["image_parameters"]["p1"]) corner_points.append(bird_view_cfg["image_parameters"]["p2"]) corner_points.append(bird_view_cfg["image_parameters"]["p3"]) corner_points.append(bird_view_cfg["image_parameters"]["p4"]) width_og = int(bird_view_cfg["image_parameters"]["width_og"]) height_og = int(bird_view_cfg["image_parameters"]["height_og"]) img_path = bird_view_cfg["image_parameters"]["img_path"] size_height = bird_view_cfg["image_parameters"]["size_height"] size_width = bird_view_cfg["image_parameters"]["size_width"] tr = np.array([ bird_view_cfg["image_parameters"]["p4"][0], bird_view_cfg["image_parameters"]["p4"][1], ]) tl = np.array([ bird_view_cfg["image_parameters"]["p2"][0], bird_view_cfg["image_parameters"]["p2"][1], ]) br = np.array([ bird_view_cfg["image_parameters"]["p3"][0], bird_view_cfg["image_parameters"]["p3"][1], ]) bl = np.array([ bird_view_cfg["image_parameters"]["p1"][0], bird_view_cfg["image_parameters"]["p1"][1], ]) widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2)) widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2)) heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2)) maxHeight = max(int(heightA), int(heightB)) matrix, imgOutput = compute_perspective_transform(corner_points, maxWidth, maxHeight, cv2.imread(img_path)) height, width, _ = imgOutput.shape dim = (width, height) # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = "model_data/mars-small128.pb" encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == "tflite": interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures["serving_default"] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) output_video_1, output_video_2 = None, None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int """ width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) """ fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: black_img = cv2.imread("./black_bg.png") black_img = cv2.resize(black_img, dim, interpolation=cv2.INTER_AREA) return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print("Video has ended or failed, try a different video format!") break frame_num += 1 print("Frame #: ", frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255.0 image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == "tflite": interpreter.set_tensor(input_details[0]["index"], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]["index"]) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == "yolov3" and FLAGS.tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] ( boxes, scores, classes, valid_detections, ) = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score, ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ["person"] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText( frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2, ) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap("tab20b") colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) bbox_array = [] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() bbox_array.append( (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle( frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2, ) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), ( int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1]), ), color, -1, ) cv2.putText( frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2, ) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format( str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int( bbox[3])), )) if len(bbox_array) >= 1: array_centroids, array_groundpoints = get_centroids_and_groundpoints( bbox_array) transformed_downoids = compute_point_perspective_transformation( matrix, array_centroids) # Show every point on the top view image for point in transformed_downoids: x, y = point cv2.circle(black_img, (x, y), 60, (0, 255, 0), 2) cv2.circle(black_img, (x, y), 3, (0, 255, 0), -1) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) # result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: if output_video_1 is None and output_video_2 is None: fourcc1 = cv2.VideoWriter_fourcc(*"MJPG") output_video_1 = cv2.VideoWriter( "./video.avi", fourcc1, 25, (frame.shape[1], frame.shape[0]), True) fourcc2 = cv2.VideoWriter_fourcc(*"MJPG") output_video_2 = cv2.VideoWriter( "./bird_view.avi", fourcc2, 25, (black_img.shape[1], black_img.shape[0]), True, ) elif output_video_1 is not None and output_video_2 is not None: output_video_1.write(frame) output_video_2.write(black_img) if cv2.waitKey(1) & 0xFF == ord("q"): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=40) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) objects = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if (FLAGS.class_1 == 'all'): objects += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) elif (FLAGS.class_1 != 'all'): if (class_name == FLAGS.class_1): objects += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # print("Objetos filtrados:{}".format(objects)) # print N_objects on screen cv2.putText(img, "# Objetos: {}".format(objects), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN for det in detections: bbox = det.to_tlbr() cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # print fps on screen #fps = ( fps + (1./(time.time()-t1)) ) / 2 #cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(0) fps = 0.0 while True: _, frame = video_capture.read() # frame shape 640*480*3 t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track, det in zip(tracker.tracks, detections): bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 4) bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 480, (124, 252, 0), 4) cv2.imshow('', frame) fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
def main(yolo, input): #拡張子ありのファイル名 basename = os.path.basename(input) print(" START YOLOv4 + DeepSort input file is ", basename) # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = '../model/mars-small128.pb' cencoder = gdet.create_box_encoder(model_filename, batch_size=1) pencoder = gdet.create_box_encoder(model_filename, batch_size=1) cmetric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) pmetric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) ctracker = Tracker(cmetric) ptracker = Tracker(pmetric) tracking = True writeVideo_flag = True #推論したいカテゴリを設定 cl_list = ['Pedestrian', 'Car'] video_capture = cv2.VideoCapture(input) fps = 0.0 fps_imutils = imutils.video.FPS().start() if writeVideo_flag: basename_without_ext = os.path.splitext(os.path.basename(input))[0] fname = basename_without_ext + 'output_yolov4.mp4' output_path = '../output/' + fname video_FourCC = int(video_capture.get(cv2.CAP_PROP_FOURCC)) video_fps = video_capture.get(cv2.CAP_PROP_FPS) video_size = (int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) frame_index = -1 Nm_fr = 0 all_result = [] while True: Car_result_ALL = [] Pedestrian_result_ALL = [] Nm_fr = Nm_fr + 1 ret, frame = video_capture.read() # frame shape 1920*1216*3 if ret != True: break print("Frame no. = ", Nm_fr) t1 = time.time() image = Image.fromarray(frame[..., ::-1]) cboxes, cconfidence, cclasses = yolo.detect_image(image, cl_list[1]) pboxes, pconfidence, pclasses = yolo.detect_image(image, cl_list[0]) if tracking: cfeatures = cencoder(frame, cboxes) pfeatures = pencoder(frame, pboxes) cdetections = [Detection(cbbox, cconfidence, ceach_class, cfeature) for cbbox, cconfidence, ceach_class, cfeature in \ zip(cboxes, cconfidence, cclasses, cfeatures)] pdetections = [Detection(pbbox, pconfidence, peach_class, pfeature) for pbbox, pconfidence, peach_class, pfeature in \ zip(pboxes, pconfidence, pclasses, pfeatures)] #else: # detections = [Detection_YOLO(bbox, confidence, each_class) for bbox, confidence, each_class in \ # zip(boxes, confidence, classes)] # Run non-maxima suppression. cboxes = np.array([d.tlwh for d in cdetections]) cscores = np.array([d.confidence for d in cdetections]) cindices = preprocessing.non_max_suppression( cboxes, nms_max_overlap, cscores) cdetections = [cdetections[i] for i in cindices] pboxes = np.array([d.tlwh for d in pdetections]) pscores = np.array([d.confidence for d in pdetections]) pindices = preprocessing.non_max_suppression( pboxes, nms_max_overlap, pscores) pdetections = [pdetections[i] for i in pindices] if tracking: # Call the tracker ctracker.predict() ctracker.update(cdetections) ptracker.predict() ptracker.update(pdetections) for ctrack in ctracker.tracks: if not ctrack.is_confirmed( ) or ctrack.time_since_update > 1: continue cbbox = ctrack.to_tlbr() cv2.rectangle(frame, (int(cbbox[0]), int(cbbox[1])), (int(cbbox[2]), int(cbbox[3])), (0, 0, 255), 3) cv2.putText(frame, "ID: " + str(ctrack.track_id), (int(cbbox[0]), int(cbbox[1])), 0, \ 1.5e-3 * frame.shape[0], (0, 0, 255), 3) #OUTPUT TRACKING ID = int(ctrack.track_id) left = int(cbbox[0]) top = int(cbbox[1]) right = int(cbbox[2]) bottom = int(cbbox[3]) Car_result = { 'id': ID, 'box2d': [left, top, right, bottom] } #予測結果 print("Car_result = ", Car_result) Car_result_ALL.append(Car_result) for ptrack in ptracker.tracks: if not ptrack.is_confirmed( ) or ptrack.time_since_update > 1: continue pbbox = ptrack.to_tlbr() cv2.rectangle(frame, (int(pbbox[0]), int(pbbox[1])), (int(pbbox[2]), int(pbbox[3])), (255, 0, 0), 3) cv2.putText(frame, "ID: " + str(ptrack.track_id), (int(pbbox[0]), int(pbbox[1])), 0, \ 1.5e-3 * frame.shape[0], (255, 0, 0), 3) #OUTPUT TRACKING ID = int(ptrack.track_id) left = int(pbbox[0]) top = int(pbbox[1]) right = int(pbbox[2]) bottom = int(pbbox[3]) Pedestrian_result = { 'id': ID, 'box2d': [left, top, right, bottom] } #予測結果 print("Pedestrian_result = ", Pedestrian_result) Pedestrian_result_ALL.append(Pedestrian_result) #YOLOv4 output to frame for Car for cdet in cdetections: cbbox = cdet.to_tlbr() cscore = "%.2f" % round(cdet.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(cbbox[0]), int(cbbox[1])), (int(cbbox[2]), int(cbbox[3])), (255, 255, 255), 2) if len(cclasses) > 0: ceach_class = cdet.cls cv2.putText(frame, str(ceach_class) + " " + cscore, (int(cbbox[0]), int(cbbox[3])), 0, \ 1.5e-3 * frame.shape[0], (255, 255, 255), 2) #YOLOv4 output to frame for Pedestrian for pdet in pdetections: pbbox = pdet.to_tlbr() pscore = "%.2f" % round(pdet.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(pbbox[0]), int(pbbox[1])), (int(pbbox[2]), int(pbbox[3])), (127, 127, 127), 2) if len(pclasses) > 0: peach_class = pdet.cls cv2.putText(frame, str(peach_class) + " " + pscore, (int(pbbox[0]), int(pbbox[3])), 0, \ 1.5e-3 * frame.shape[0], (127, 127, 127), 2) # Each frame result all_result.append({ 'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL }) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 print(" FPS = %f" % (fps)) if writeVideo_flag: out.release() video_capture.release() fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) return {basename: all_result}
def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) while True: _, frame = vid.read() try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) #t1 = time.time() #pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True) t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen # names = {'6_d': 'Thomas Delaney', # '10_b': 'Leroy Sane', # '18_b': 'Leon Goretzka', # '25_b': 'Thomas Muller', # '5_d': 'Dan-Axel Zagadou', # '12_d': 'Zaragoza', # '4_b': 'Niklas Sule', # '14_d': 'Nico Schulz', # '11_d': 'Marco Reus', # 'Referee': 'Referee', # 'ball': 'ball', # '10_d': 'Thorgan Hazard', # '6_b': 'Joshua Kimmich ', # 'gk_b': 'Ron-Thorben Hoffmann(GK)', # '17_b': 'Jérôme Boateng', # '27_b': 'David Alaba', # '9_d': 'Erling Haaland', # '8_d': 'Mahmoud Dahoud', # 'gk_d': 'Luca Unbehaun(GK)', # '19_b': 'Alphonso Davies', # '29_b': 'Kingsley Coman', # '24_d': 'Marcel Schmelzer', # '9_b': 'Robert Lewandowski', # "23_d": 'Emre Can', # } # if class_name == 'Referee': # color = (0, 0, 0) if class_name == 'ball': # color = (255, 255, 255) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) # else: # try: # colors = {'b': (252, 3, 78), 'd': (250, 247, 80)} # color = colors[str(class_name.split('_')[-1])] # except KeyError: # pass # class_name = names[str(class_name)] # color = (250, 247, 80) # color = colors[int(track.track_id) % len(colors)] # color = [i * 255 for i in color] # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1]-30)), (int(bbox[0])+(len(str(class_name)))*17, int(bbox[1])), color, -1) cv2.putText(frame, class_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 251, 46), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def run_cam_mode(detection_model, args): ''' Run the tracker on camera stream ''' cam = cv2.VideoCapture(0) cam.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT) metric = nn_matching.NearestNeighborDistanceMetric("cosine", args.max_cosine_distance, NN_BUDGET) tracker = Tracker(metric) encoder = gen_det.create_box_encoder(HUMAN_ENCODER_PATH, batch_size=32) # Cam loop while True: ret, frame = cam.read() t1 = time.time() # Detect humans in the frame frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) bboxes, detection_scores = detect_humans(detection_model, frame_rgb) detection_list = cvt_to_detection_objects(frame_rgb, bboxes, detection_scores, encoder) # Run non-maxima suppression. detection_list = [d for d in detection_list if d.confidence >= args.min_confidence] boxes = np.array([d.tlwh for d in detection_list]) scores = np.array([d.confidence for d in detection_list]) indices = dsutil_prep.non_max_suppression(boxes, NMS_MAX_OVERLAP, scores) detection_list = [detection_list[i] for i in indices] # Update tracker tracker.predict() tracker.update(detection_list) # FPS counter fps = 1/(time.time()-t1) # Update visualization output_img = frame.copy() ## Visualize all detections for i, detection in enumerate(detection_list): x,y,w,h = detection.tlwh pt1 = int(x), int(y) pt2 = int(x + w), int(y + h) cv2.rectangle(output_img, pt1, pt2, (0, 0, 255), 2) ## Visualize confirmed tracks tracks = tracker.tracks for track in tracks: if not track.is_confirmed() or track.time_since_update > 0: continue color = dsutil_viz.create_unique_color_uchar(track.track_id) x,y,w,h = track.to_tlwh() pt1 = int(x), int(y) pt2 = int(x + w), int(y + h) cv2.rectangle(output_img, pt1, pt2, color, 2) text_size = cv2.getTextSize(str(track.track_id), cv2.FONT_HERSHEY_PLAIN, 1, 2) center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1] pt2 = pt1[0] + 10 + text_size[0][0], pt1[1] + 10 + text_size[0][1] cv2.rectangle(output_img, pt1, pt2, color, -1) cv2.putText(output_img, str(track.track_id), center, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 2) cv2.putText(output_img, 'FPS: {:.1f}'.format(fps), (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA) cv2.imshow('detection output', output_img) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2. destroyAllWindows() cam.release()
def Display(): print("Start Displaying") yolo = YOLO() # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True w = 768 h = 432 fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 max_boxs = 0 person_track = {} yolo2 = YOLO2() while True: if q.empty() != True: #读取打卡信息 face = [] cur1 = conn.cursor() # 获取一个游标 sql1 = "select * from worker" cur1.execute(sql1) data = cur1.fetchall() for d in data: # 注意int类型需要使用str函数转义 name = str(d[1]) + '_' + d[2] face.append(name) cur1.close() # 关闭游标 #获取队列帧 frame = q.get() t1 = time.time() #进行安全措施检测 image = Image.fromarray(frame[..., ::-1]) # bgr to rgb img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) frame, wear = yolo2.detect_image(img) frame = np.array(frame) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # 获取警戒线 #cv2.line(frame, (132,368), (229, 368), (0, 255, 255), 3) cv2.line(frame, (275,360), (378, 360), (0, 255, 255), 1) transboundaryline = t.line_detect_possible_demo(frame) #yolo目标检测 boxs = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if len(boxs) > max_boxs: max_boxs = len(boxs) # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if max_boxs < track.track_id: tracker.tracks.remove(track) tracker._next_id = max_boxs + 1 if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2) PointY = bbox[3] if track.track_id not in person_track: track2 = copy.deepcopy(track) person_track[track.track_id] = track2 else: track2 = copy.deepcopy(track) bbox2 = person_track[track.track_id].to_tlbr() PointX2 = bbox2[0] + ((bbox2[2] - bbox2[0]) / 2) PointY2 = bbox2[3] distance = math.sqrt(pow(PointX - PointX2, 2) + pow(PointY - PointY2, 2)) #修正 if distance < 120: person_track[track.track_id] = track2 else: # print('last',track.track_id) dis = {} for key in person_track: bbox3 = person_track[key].to_tlbr() PointX3 = bbox3[0] + ((bbox3[2] - bbox3[0]) / 2) PointY3 = bbox3[3] d = math.sqrt(pow(PointX3 - PointX, 2) + pow(PointY3 - PointY, 2)) dis[key] = d dis = sorted(dis.items(), key=operator.itemgetter(1), reverse=False) track2.track_id = dis[0][0] person_track[dis[0][0]] = track2 tracker.tracks.remove(track) tracker.tracks.append(person_track[track.track_id]) # 写入class try: box_title = face[track2.track_id - 1] except Exception as e: box_title = str(track2.track_id) + "_" + "unknow" if box_title not in workers: wid = box_title.split('_')[0] localtime = time.asctime(time.localtime(time.time())) workers[box_title] = wk.Worker() workers[box_title].set(box_title, localtime, (int(PointX), int(PointY))) cur2 = conn.cursor() # 获取一个游标 sql2 = "UPDATE worker SET in_time='" + localtime + "' WHERE worker_id= '" + wid + "'" cur2.execute(sql2) cur2.close() # 关闭游标 else: localtime = time.asctime(time.localtime(time.time())) yoloPoint = (int(PointX), int(PointY)) wear_dic = {} workers[box_title].current_point = yoloPoint workers[box_title].track_point.append(workers[box_title].current_point) mytrack = str(workers[box_title].track_point) wid = box_title.split('_')[0] # 卡尔曼滤波预测 if wid not in utils.KalmanNmae: utils.myKalman(wid) if wid not in utils.lmp: utils.setLMP(wid) cpx, cpy = utils.predict(workers[box_title].current_point[0], workers[box_title].current_point[1], wid) if cpx[0] == 0.0 or cpy[0] == 0.0: cpx[0] = workers[box_title].current_point[0] cpy[0] = workers[box_title].current_point[1] workers[box_title].next_point = (int(cpx), int(cpy)) cur3 = conn.cursor() # 获取一个游标 sql3 = "UPDATE worker SET current_point= '" + str(workers[box_title].current_point) + "' ,track_point = '" + mytrack + "',next_point = '" + str(workers[box_title].next_point) + "' WHERE worker_id= '" + wid + "'" cur3.execute(sql3) cur3.close() # 写入安全措施情况 if len(wear) > 0: for w in wear: wear_dis = int(math.sqrt(pow(w[0] - yoloPoint[0], 2) + pow(w[1] - yoloPoint[1], 2))) wear_dic[wear_dis] = w wear_dic = sorted(wear_dic.items(), key=operator.itemgetter(0), reverse=False) if wear_dic[0][0] < 120: cur4 = conn.cursor() # 获取一个游标 if wear[wear_dic[0][1]] == 1: if len(workers[box_title].wear['no helmet']) == 0: workers[box_title].wear['no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: print(box_title,workers[box_title].wear['no helmet']) if localtime not in workers[box_title].wear['no helmet']: workers[box_title].wear['no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 2: if len(workers[box_title].wear['no work cloths']) == 0: workers[box_title].wear['no work cloths'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[box_title].wear['no work cloths']: workers[box_title].wear['no work cloths'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 3: if len(workers[box_title].wear['unsafe wear']) == 0: workers[box_title].wear['unsafe wear'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[box_title].wear['unsafe wear']: workers[box_title].wear['unsafe wear'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 # 写入越线情况 if len(workers[box_title].track_point) > 4: for i in range(len(transboundaryline)): p1 = (transboundaryline[i][0], transboundaryline[i][1]) p2 = (transboundaryline[i][2], transboundaryline[i][3]) p3 = workers[box_title].track_point[-2] p4 = workers[box_title].track_point[-1] a = t.IsIntersec(p1, p2, p3, p4) if a == '有交点': cur5 = conn.cursor() # 获取一个游标 cur6 = conn.cursor() # 获取一个游标 cur5.execute( "select time from transboundary where worker_id = '" + wid + "' ") qurrytime = cur5.fetchone() cur5.close() # 关闭游标 if qurrytime == None: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 else: temp1 = 0 for qt in qurrytime: if qt == localtime: temp1 = 1 if temp1 == 0: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 if len(workers[box_title].track_point) >= 20: workers[box_title].previous_point = workers[box_title].track_point[-5] conn.commit() try: cv2.putText(frame, face[track2.track_id - 1], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) except Exception as e: cv2.putText(frame, "unknow", (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str( boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 if cv2.waitKey(1) & 0xFF == ord('q'): break
def run_eval_mode(detection_model, args): ''' Evaluate the tracker on MOT-16 training set ''' train_sequence_names = ['MOT16-02', 'MOT16-04', 'MOT16-05', 'MOT16-09', 'MOT16-10', 'MOT16-11', 'MOT16-13'] # Print parameters before starting ---------------- logger.info("\nConfig ---- \n----Detector: {} \n----Online detection: {}".format(args.detector, args.online_detection)) for sequence_name in train_sequence_names: logger.info("Processing sequence: {}".format(sequence_name)) sequence_dir = MOT16_TRAIN_DATA_DIR + sequence_name + '/' if args.online_detection == 1: if args.detector == 'DPM': raise Exception(" Online detection is possible only when using SSD") detection_file = None else: detection_dir = MOT16_DETECTION_DIR_BASE + args.detector + "/MOT16_POI_train/" detection_file = detection_dir + sequence_name + '.npy' detections = np.load(detection_file, allow_pickle=True) #print("detections shape:", detections.shape) seq_info = gather_sequence_info(sequence_dir, detection_file) metric = nn_matching.NearestNeighborDistanceMetric("cosine", args.max_cosine_distance, NN_BUDGET) tracker = Tracker(metric) results = [] encoder = gen_det.create_box_encoder(HUMAN_ENCODER_PATH, batch_size=32) n_frames = len(seq_info['image_filenames']) logger.info("----Total frames:{}".format(n_frames)) logger.info("----Detections file:{}".format(detection_file)) def frame_callback(vis, frame_idx): logger.info("Processing Sequence {}, Frame {:05d}" .format(sequence_name, frame_idx)) frame = cv2.imread(seq_info['image_filenames'][frame_idx]) t1 = time.time() if args.online_detection == 0: # If not online detection- Use pre-computed detections # Load image and generate detections. detection_list = create_detections(detections, frame_idx, MIN_DETECTION_HEIGHT) detection_list = [d for d in detection_list if d.confidence >= args.min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detection_list]) scores = np.array([d.confidence for d in detection_list]) indices = dsutil_prep.non_max_suppression(boxes, NMS_MAX_OVERLAP, scores) detection_list = [detection_list[i] for i in indices] else: # Use Mobilenet-SSD on the fly # Detect humans frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) bboxes, detection_scores = detect_humans(detection_model, frame_rgb) detection_list = cvt_to_detection_objects(frame, bboxes, detection_scores, encoder) # Update tracker. tracker.predict() tracker.update(detection_list) # FPS counter fps = 1/(time.time()-t1) # Update visualization. if args.display == 1: vis.set_image(frame.copy()) vis.draw_detections(detection_list) vis.draw_trackers(tracker.tracks) # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], fps]) # Visualize if args.display == 1: visualizer = dsutil_viz.Visualization(seq_info, update_ms=5) else: visualizer = dsutil_viz.NoVisualization(seq_info) visualizer.run(frame_callback) # Store results. if args.online_detection == 0: # If not online detection output_dir = RESULTS_DIR + 'Pedestrian Tracking/' + 'EVAL_' + args.detector + '/trackOutput-{}minConf/'.format(args.min_confidence) output_file_path = output_dir + sequence_name + '.txt' else: output_file_path = './Results/temp_hypotheses.txt' with open(output_file_path, 'w') as output_file: avg_fps = 0 for row in results: output_file.write("{:d},{:d},{:.2f},{:.2f},{:.2f},{:.2f}\n".format(row[0], row[1], row[2], row[3], row[4], row[5])) avg_fps += row[6] avg_fps /= n_frames
def main(args): logger.info('Start Tracking...') ctx = mx.gpu(0) if args.gpu else mx.cpu() fps = max(0, min(BASE_FPS, args.fps)) net = model_zoo.get_model(args.network, pretrained=True, ctx=ctx) net.reset_class(classes=['person'], reuse_weights=['person']) # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # feature extractor for deepsort re-id encoder = gdet.BoxEncoder() metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) capture = cv2.VideoCapture(args.src) frame_interval = BASE_FPS // fps if fps > 0 else 0 frame_index = 0 while True: ret, frame = capture.read() if ret != True: break if 0 < fps and frame_index % frame_interval != 0: frame_index += 1 continue x, img = gcv.data.transforms.presets.yolo.transform_test( mx.nd.array(frame).astype('uint8'), short=args.short, ) class_IDs, det_scores, det_boxes = net(x.as_in_context(ctx)) boxs = [] person = mx.nd.array([0]) score_threshold = mx.nd.array([0.5]) for i, class_ID in enumerate(class_IDs[0]): if class_ID == person and det_scores[0][i] >= score_threshold: boxs.append(det_boxes[0][i].asnumpy()) if boxs: features = encoder(img, boxs) else: features = np.array([]) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) frame_index += 1 # store original scene cv2.imwrite(os.path.join(args.out_dir, f'{frame_index}.jpg'), img) show_img = img.copy() # check missed for track in tracker.tracks: bbox = [max(0, int(x)) for x in track.to_tlbr()] if not track.is_confirmed() or track.time_since_update > 1: if 2 <= track.time_since_update < 10: try: cv2.imwrite( os.path.join( args.out_dir, f'missed-{frame_index}-{track.track_id}.jpg'), img, ) except: traceback.print_exc() logger.info('Skipped by time_since_update') continue logger.info(f'Frame #{frame_index} - Id: {track.track_id}') cv2.rectangle(show_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 2) cv2.putText(show_img, str(track.track_id), (bbox[0], bbox[1] + 30), 0, 5e-3 * 200, (0, 255, 0), 2) # show image cv2.imwrite(os.path.join(args.out_dir, f'anno-{frame_index}.jpg'), show_img) cv2.imshow('', show_img) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break logger.info( f'Missed obj: {tracker.missed_obj}, Missed frame: {tracker.missed_frame}' ) capture.release() cv2.destroyAllWindows()
def process_frame(dataset): # Definition of the parameters to_process = {} max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 producer = KafkaProducer( bootstrap_servers='master:6667', value_serializer=lambda m: json.dumps(m).encode('utf8')) data = dataset.collect() # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) dt_now = dt.datetime.now() for datum in data: event = json.loads(datum[1]) dt_event = dt.datetime.strptime(event['timestamp'], '%Y-%m-%dT%H:%M:%S.%f') delta = dt_now - dt_event print("timestamp = " + str(dt_event)) if delta.seconds > 5: continue to_process[event['camera_id']] = event for key, event in to_process.items(): t1 = time.time() event = json.loads(datum[1]) try: decoded = base64.b64decode(event['image']) except TypeError: return filename = '/home/haohsiang/Vigilancia-Distributed/codev1frame.jpg' # I assume you have a way of picking unique filenames with open(filename, 'wb') as f: f.write(decoded) frame = cv2.imread(filename) #ret, frame = video_capture.read() # frame shape 640*480*3 image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) print("box_num", len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) #sent result to kafka if len(boxs) > 0: print( str(track.track_id) + ' :' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])) print(dt.datetime.now().time()) result = { 'ID': str(track.track_id), 'timestamp': dt.datetime.now().isoformat(), 'location_x': str(bbox[0]), 'w': str(bbox[2]) } producer.send('position', result) producer.flush() # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break
def run(sequence_dir, detection_file, output_file, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget, display): """ Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ # 收集流的信息,包括图片名称,检测结果以及置信度 seq_info = gather_sequence_info(sequence_dir, detection_file) # metric 实例化 nn_matching 的 NearestNeighborDistanceMetric 类,输入的初始距离度量函数是 cosine,此时可以传入 euclidean metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # 创建一个追踪器对象 tracker = Tracker(metric) results = [] def frame_callback(vis, frame_idx): print("Processing frame %05d" % frame_idx) # Load image and generate detections. # 根据之前的参数生成检测框,并且只保留置信度 confidence 大于 min_confidence 的检测框 detections = create_detections(seq_info["detections"], frame_idx, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maximal suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Update visualization. if display: image = cv2.imread(seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) vis.set_image(image.copy()) vis.draw_detections(detections) vis.draw_trackers(tracker.tracks) # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]]) # Run tracker. if display: visualizer = visualization.Visualization(seq_info, update_ms=5) else: visualizer = visualization.NoVisualization(seq_info) visualizer.run(frame_callback) # Store results. f = open(output_file, 'w') for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % ( row[0], row[1], row[2], row[3], row[4], row[5]), file=f)
def Display(self, yolo): self.ui.Open.setEnabled(False) self.ui.Close.setEnabled(True) # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True if writeVideo_flag: # Define the codec and create VideoWriter object w = int(self.cap.get(3)) h = int(self.cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter("output.avi", fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while self.cap.isOpened(): ret, frame = self.cap.read() if ret != True: break t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) str1 = "Identification List:" + '\n\n' for track in tracker.tracks: if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() import random if track.track_id == 2 and self.fileName.split( "/")[-1] == "reid.mp4": str1 = str1 + " person" + str( track.track_id) + " —— LaiSiyu (" + str( random.random() * 0.1 + 0.85) + ")" + '\n\n' # str1 += " person{0} - LaiSiyu ({:%.2f})".format(track.track_id, random.random() * 0.13 + 0.85) ui.label_2.setText(str1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 0, 255), 2) else: str1 = str1 + " person" + str(track.track_id) + '\n\n' ui.label_2.setText(str1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) # TODO # RGB转BGR frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) img = QImage(frame.data, frame.shape[1], frame.shape[0], QImage.Format_RGB888) self.ui.DisplayLabel.setPixmap(QPixmap.fromImage(img)) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break if self.isCamera: cv2.waitKey(1) else: cv2.waitKey(int(1000 / self.frameRate)) # 判断关闭事件是否已触发 if True == self.stopEvent.is_set(): # 关闭事件置为未触发,清空显示label self.stopEvent.clear() self.ui.DisplayLabel.clear() self.ui.label_2.clear() self.ui.Close.setEnabled(False) self.ui.Open.setEnabled(True) break self.cap.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Determining the FPS of a video having variable frame rate # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos filename = "clip1.mp4" # Determining the total duration of the video clip = VideoFileClip(filename) cap2 = cv2.VideoCapture(filename) co = 0 ret2 = True while ret2: ret2, frame2 = cap2.read() # Determining the total number of frames co += 1 cap2.release() # Computing the average FPS of the video Input_FPS = co / clip.duration # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 frame_count = 0 # Implementing Deep Sort algorithm model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # Cosine distance is used as the metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(filename) # Define the codec and create a VideoWriter object to save the output video out = cv2.VideoWriter( 'output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS, (int(video_capture.get(3)), int(video_capture.get(4)))) # To calculate the frames processed by the deep sort algorithm per second fps = 0.0 # Initializing empty variables for counting and tracking purpose queue_track_dict = {} # Count time in queue queue_track_dict_2 = {} alley_track_dict = {} # Count time in alley store_track_dict = {} # Count total time in store latest_frame = {} # Track the last frame in which a person was identified reidentified = { } # Yes or No : whether the person has been re-identified at a later point in time plot_head_count_store = [] # y-axis for Footfall Analysis plot_head_count_queue = [] # y-axis for Footfall Analysis plot_time = [] # x-axis for Footfall Analysis # Loop to process each frame and track people while True: ret, frame = video_capture.read() if ret != True: break maxIntensity = 255.0 phi = 1 theta = 1 newImage1 = (maxIntensity / phi) * (frame / (maxIntensity / theta))**1.3 frame = array(newImage1, dtype=uint8) cv2.imwrite('testing.jpg', frame) if frame_count == 5000: break head_count_store = 0 head_count_queue = 0 t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # BGR to RGB conversion boxs = yolo.detect_image(image) features = encoder(frame, boxs) # Getting the detections having score of 0.0 to 1.0 detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression on the bounding boxes boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker to associate tracking boxes to detection boxes tracker.predict() tracker.update(detections) # Defining the co-ordinates of the area of interest pts3 = np.array([[133, 120], [303, 120], [297, 175], [127, 175]], np.int32) pts3 = pts3.reshape((-1, 1, 2)) pts2 = np.array( [[380, 250], [380, 365], [175, 480], [0, 480], [0, 380]], np.int32) pts2 = pts2.reshape((-1, 1, 2)) # Queue Area - 1 pts = np.array([[0, 380], [0, 0], [640, 0], [640, 480], [170, 480], [380, 360], [380, 250]], np.int32) pts = pts.reshape((-1, 1, 2)) # Alley Region cv2.polylines(frame, [pts], True, (255, 0, 255), thickness=1) cv2.polylines(frame, [pts3], True, (248, 197, 39), thickness=2) cv2.polylines(frame, [pts2], True, (0, 255, 255), thickness=2) # Drawing tracker boxes and frame count for people inside the areas of interest for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # Checking if the person is within an area of interest queue_point_test_2 = center_point_inside_polygon(bbox, pts3) queue_point_test = center_point_inside_polygon(bbox, pts2) alley_point_test = center_point_inside_polygon(bbox, pts) # Checking if a person has been reidentified in a later frame if queue_point_test == 'inside' or alley_point_test == 'inside' or queue_point_test_2 == 'inside': if track.track_id in latest_frame.keys(): if latest_frame[track.track_id] != frame_count - 1: reidentified[track.track_id] = 1 # Initializing variables incase a new person has been seen by the model if queue_point_test == 'inside' or alley_point_test == 'inside' or queue_point_test_2 == 'inside': head_count_store += 1 if track.track_id not in store_track_dict.keys(): store_track_dict[track.track_id] = 0 queue_track_dict[track.track_id] = 0 queue_track_dict_2[track.track_id] = 0 alley_track_dict[track.track_id] = 0 reidentified[track.track_id] = 0 # Processing for people inside the Queue Area - 1 if queue_point_test == 'inside': head_count_queue += 1 queue_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 255), 2) cv2.rectangle(frame, (int(bbox[0]) - 1, int(bbox[1]) - 15), (int(bbox[2]) + 1, int(bbox[1])), (0, 255, 255), -1) wait_time = round( (queue_track_dict[track.track_id] / Input_FPS), 2) cv2.putText(frame, str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 5, 0.9, (0, 0, 0), 1) # Processing for people inside the Queue Area - 2 if queue_point_test_2 == 'inside': head_count_queue += 1 queue_track_dict_2[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (248, 197, 39), 2) cv2.rectangle(frame, (int(bbox[0]) - 1, int(bbox[1]) - 15), (int(bbox[2]) + 1, int(bbox[1])), (248, 197, 39), -1) wait_time = round( (queue_track_dict_2[track.track_id] / Input_FPS), 2) cv2.putText(frame, str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 5, 0.9, (0, 0, 0), 1) # Processing for people inside the Alley Region if alley_point_test == 'inside': alley_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4) # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2) # Getting the total Store time for a person if track.track_id in store_track_dict.keys(): store_track_dict[track.track_id] = queue_track_dict[ track.track_id] + alley_track_dict[track.track_id] # Drawing bounding box detections for people inside the store for det in detections: bbox = det.to_tlbr() # Checking if the person is within an area of interest queue_point_test = center_point_inside_polygon(bbox, pts) alley_point_test = center_point_inside_polygon(bbox, pts2) # if queue_point_test == 'inside' or alley_point_test == 'inside': # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2) # Video Overlay - Head Count Data at that instant cv2.putText(frame, "Count: " + str(head_count_store), (30, 610), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Count: " + str(head_count_store), (30, 610), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in queue total_people = len([v for v in queue_track_dict.values() if v > 0]) total_queue_frames = sum(v for v in queue_track_dict.values() if v > 0) avg_queue_frames = 0 if total_people != 0: avg_queue_frames = total_queue_frames / total_people avg_queue_time = round((avg_queue_frames / Input_FPS), 2) # Video Overlay - Average Wait Time in Queue cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', (30, 690), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', (30, 690), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in the store total_people = len(store_track_dict) total_store_frames = sum(store_track_dict.values()) avg_store_frames = 0 if total_people != 0: avg_store_frames = total_store_frames / total_people avg_store_time = round((avg_store_frames / Input_FPS), 2) # Video Overlay - Average Store time cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', (30, 650), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', (30, 650), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Write the frame onto the VideoWriter object out.write(frame) # Calculating the frames processed per second by the model fps = (fps + (1. / (time.time() - t1))) / 2 frame_count += 1 # Printing processing status to track completion op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str( round(fps, 2)) print("\r" + op, end="") # Adding plot values for Footfall Analysis every 2 seconds (hard coded for now) if frame_count % 50 == 0: plot_time.append(round((frame_count / Input_FPS), 2)) plot_head_count_store.append(head_count_store) plot_head_count_queue.append(head_count_queue) # Press Q to stop the video if cv2.waitKey(1) & 0xFF == ord('q'): break # Defining data to be written into the csv file - Detailed Report csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time'] csv_data = [] csv_row = {} detailed_csv_file = 'Detailed_Store_Report.csv' queue_unique_count = 0 for k, v in store_track_dict.items(): csv_row = {} csv_row = { csv_columns[0]: k, csv_columns[1]: round((queue_track_dict[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2) } if round((queue_track_dict[k] / Input_FPS), 2) > 1: queue_unique_count += 1 csv_data.append(csv_row) # Writing the data into the csv file - Detailed Report with open(detailed_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in csv_data: writer.writerow(data) # Defining data to be written into the csv file - Detailed Report csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time'] csv_data = [] csv_row = {} detailed_csv_file = 'Detailed_Store_Report_2.csv' queue_unique_count = 0 for k, v in store_track_dict.items(): csv_row = {} csv_row = { csv_columns[0]: k, csv_columns[1]: round((queue_track_dict_2[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2) } if round((queue_track_dict_2[k] / Input_FPS), 2) > 1: queue_unique_count += 1 csv_data.append(csv_row) # Writing the data into the csv file - Detailed Report with open(detailed_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in csv_data: writer.writerow(data) # Defining data to be written into the csv file - Brief Report csv_columns_brief = [ 'Total Head Count', 'Total Queue Time', 'Average Queue Time', 'Total Store Time', 'Average Store Time' ] brief_csv_file = 'Brief_Store_Report.csv' csv_data_brief = { csv_columns_brief[0]: queue_unique_count, csv_columns_brief[1]: round((sum(queue_track_dict.values()) / Input_FPS), 2), csv_columns_brief[2]: avg_queue_time, csv_columns_brief[3]: round((sum(store_track_dict.values()) / Input_FPS), 2), csv_columns_brief[4]: avg_store_time } # Writing the data into the csv file - Brief Report with open(brief_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns_brief) writer.writeheader() writer.writerow(csv_data_brief) # Releasing objects created video_capture.release() out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = False asyncVideo_flag = False file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): start = time.time() max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 counter = [] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) find_objects = ['person'] metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('./output/output.avi', fourcc, 15, (w, h)) list_file = open('detection_rslt.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() #image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs, confidence, class_names = yolo.detect_image(image) # print(class_names,":",boxs) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature, class_name) for bbox, feature, class_name in zip(boxs, features, class_names) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue class_name = track.get_class() # boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] list_file.write(str(frame_index) + ',') list_file.write(str(track.track_id) + ',') list_file.write(str(class_name) + ',') # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) b0 = str(bbox[0] ) #.split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1] b1 = str(bbox[1] ) #.split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1] b2 = str(bbox[2] - bbox[0] ) #.split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1] b3 = str(bbox[3] - bbox[1]) list_file.write( str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3)) list_file.write('\n') list_file.write(str(track.track_id) + ',') # print(class_name) if class_name == "car": _, image0 = video_capture.read() print([int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) cv2.putText(frame, str(track.track_id) + class_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 5e-3 * 150, (color), 2) i += 1 #bbox_center_point(x,y) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) #track_id[center] pts[track.track_id].append(center) thickness = 5 #center point cv2.circle(frame, (center), 1, color, thickness) # draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2) count = len(set(counter)) cv2.putText(frame, "Total Pedestrian Counter: " + str(count), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Pedestrian Counter: " + str(i), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) cv2.namedWindow("YOLO4_Deep_SORT", 0) cv2.resizeWindow('YOLO4_Deep_SORT', 1024, 768) cv2.imshow('YOLO4_Deep_SORT', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps = (fps + (1. / (time.time() - t1))) / 2 out.write(frame) frame_index = frame_index + 1 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break print(" ") print("[Finish]") end = time.time() if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") #print("[INFO]: model_image_size = (960, 960)") video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = False config.gpu_options.per_process_gpu_memory_fraction = 0.1 _ = InteractiveSession(config=config) utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter( model_path=f'{FLAGS.weights}_{FLAGS.size}') interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load( f'{FLAGS.weights}_{FLAGS.size}', tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) all_start_time = None frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) Image.fromarray(frame) else: fps = float(frame_num) / (time.time() - all_start_time) print("fps=%.2f size=%d frames=%d deep=%s output=%s" % (fps, FLAGS.size, frame_num, "true" if FLAGS.deep else "false", FLAGS.output)) break frame_num += 1 if FLAGS.info: print("frame_num=%d" % frame_num) start_time = time.time() if all_start_time is None: all_start_time = time.time() image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for _, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker if FLAGS.deep: features = encoder(frame, bboxes) else: features = np.empty((len(bboxes), 0), np.float32) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) # calculate frames per second of running detections if FLAGS.info: fps = 1.0 / (time.time() - start_time) print("fps=%.2f" % fps) if not FLAGS.dont_show: if cv2.waitKey(1) & 0xFF == ord('q'): break if not FLAGS.dont_show: cv2.destroyAllWindows()
border_line = [(0, 400), (1200, 400)] path_track = 20 # how many frames in path are saves detector = YOLOv4( input_shape=(HEIGHT, WIDTH, 3), anchors=YOLOV4_ANCHORS, num_classes=80, training=False, yolo_max_boxes=64, yolo_iou_threshold=0.3, yolo_score_threshold=0.4, ) detector.load_weights("models/yolov4.h5") max_cosine_distance = 0.2 # 03 encoder = gdet.create_box_encoder("models/mars-small128.pb", batch_size=64) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, None) tracker = Tracker(metric) border_lines = {'border1': [[0, 100], [312, 104]]} def drawBorderLines(frame): for b in border_lines: a = border_lines[b][0] b = border_lines[b][1] length = 40 vX0 = b[0] - a[0] vY0 = b[1] - a[1] mag = math.sqrt(vX0 * vX0 + vY0 * vY0) vX = vX0 / mag vY = vY0 / mag temp = vX
def main(yolo): t0 = time.time() max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 counter = [] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) #跟踪使用 find_objects = ['person'] metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') if args["ids"] == False: out = cv2.VideoWriter('./output/output%s.avi' % args["camera"][1], fourcc, 50, (w, h)) else: out = cv2.VideoWriter( './output/output%s_reid.avi' % args["camera"][1], fourcc, 50, (w, h)) list_file = open('detection_rslt.txt', 'w') frame_index = -1 nump = 1 #fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() frame2 = copy.deepcopy(frame) #image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb 仅yolo使用 boxs, confidence, class_names = yolo.detect_image(image) print(boxs) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] makequery = True for det in detections: bbox = det.to_tlbr() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) #跟踪框 color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] list_file.write(str(frame_index) + ',') #3-5-7-9 list_file.write(str(track.track_id) + ',') #画面内的所有人id b0 = str(bbox[0] ) #.split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1] b1 = str(bbox[1] ) #.split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1] b2 = str(bbox[2] - bbox[0] ) #.split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1] b3 = str(bbox[3] - bbox[1]) #放置id list_file.write( str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3)) list_file.write('\n') if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) #person i += 1 center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) pts[track.track_id].append(center) thickness1 = 5 for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (255, 255, 255), thickness) if args["ids"] == False: cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) #id cv2.circle(frame, (center), 1, color, thickness1) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) try: num = (int(args["camera"][1]) - 1) * 200 path = 'Z:\\pro2\\whole\\person\\gallery\\%04d' % int( track.track_id + num) if not os.path.exists(path): os.makedirs(path) if len(os.listdir(path)) <= 150: #最多存储150张相片 crop = frame2[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] crop = cv2.resize(crop, (64, 128), interpolation=cv2.INTER_AREA ) #CUBIC 对扩大图片 area 对缩小图片 filepath = path + '\\' + '%04d' % int( track.track_id + num) + '_%s_' % args["camera"] + '%04d' % int( len(os.listdir(path)) + 1) + '_%.2f' % (video_capture.get(0) / 1000) + '.jpg' #%04d cv2.imwrite(filepath, crop) except: continue #单独索引 else: makequery = False id1 = int(args["ids"]) if int(track.track_id) == id1: cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) #id cv2.circle(frame, (center), 1, color, thickness1) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) #person else: continue count = len(set(counter)) cv2.putText(frame, "Total Pedestrian Counter: " + str(count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Pedestrian Counter: " + str(i), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.namedWindow("YOLO4_Deep_SORT", 0) cv2.resizeWindow('YOLO4_Deep_SORT', 1024, 768) cv2.imshow('YOLO4_Deep_SORT', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 out.write(frame) frame_index = frame_index + 1 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break #makequery if makequery == True: root_path = 'Z:\\pro2\\whole\\person\\gallery\\' copy_path = 'Z:\\pro2\\whole\\person\\query\\' ids = os.listdir(root_path) #print(ids) for i in ids: img_path = root_path + i img = os.listdir(img_path) indeximg = img[int(len(img) / 2)] old_name = img_path + '\\' + indeximg new_path = copy_path + i new_name = new_path + '\\' + indeximg if not os.path.exists(new_path): os.makedirs(new_path) shutil.copyfile(old_name, new_name) print(" ") print("[Finish]") end = time.time() print("the whole time ", end - t0) if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()