def main(args): logger.info('Start Tracking...') ctx = mx.gpu(0) if args.gpu else mx.cpu() fps = max(0, min(BASE_FPS, args.fps)) net = model_zoo.get_model(args.network, pretrained=True, ctx=ctx) net.reset_class(classes=['person'], reuse_weights=['person']) # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # feature extractor for deepsort re-id encoder = gdet.BoxEncoder() metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget) tracker = Tracker(metric) capture = cv2.VideoCapture(args.src) frame_interval = BASE_FPS // fps if fps > 0 else 0 frame_index = 0 while True: ret, frame = capture.read() if ret != True: break if 0 < fps and frame_index % frame_interval != 0: frame_index += 1 continue x, img = gcv.data.transforms.presets.yolo.transform_test( mx.nd.array(frame).astype('uint8'), short=args.short, ) class_IDs, det_scores, det_boxes = net(x.as_in_context(ctx)) boxs = [] person = mx.nd.array([0]) score_threshold = mx.nd.array([0.5]) for i, class_ID in enumerate(class_IDs[0]): if class_ID == person and det_scores[0][i] >= score_threshold: boxs.append(det_boxes[0][i].asnumpy()) if boxs: features = encoder(img, boxs) else: features = np.array([]) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) frame_index += 1 # store original scene cv2.imwrite(os.path.join(args.out_dir, f'{frame_index}.jpg'), img) show_img = img.copy() # check missed for track in tracker.tracks: bbox = [max(0, int(x)) for x in track.to_tlbr()] if not track.is_confirmed() or track.time_since_update > 1: if 2 <= track.time_since_update < 10: try: cv2.imwrite( os.path.join( args.out_dir, f'missed-{frame_index}-{track.track_id}.jpg'), img, ) except: traceback.print_exc() logger.info('Skipped by time_since_update') continue logger.info(f'Frame #{frame_index} - Id: {track.track_id}') cv2.rectangle(show_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 2) cv2.putText(show_img, str(track.track_id), (bbox[0], bbox[1] + 30), 0, 5e-3 * 200, (0, 255, 0), 2) # show image cv2.imwrite(os.path.join(args.out_dir, f'anno-{frame_index}.jpg'), show_img) cv2.imshow('', show_img) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break logger.info( f'Missed obj: {tracker.missed_obj}, Missed frame: {tracker.missed_frame}' ) capture.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) #video_path ="C:/Users/Rashid Ali/Desktop/Person counting/V05.mp4" writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object video_width = int(video_capture.get(3)) video_height = int(video_capture.get(4)) video_fps = int(video_capture.get(5)) video_size = (int(video_width), int(video_height)) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out = cv2.VideoWriter('output_V09.mp4', fourcc, video_fps, video_size) list_file = open('detection_v2 .txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) ''' for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) ''' person_count = 0 count1 = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue person_count = person_count + 1 track_id = '{} {:.1f}'.format('Track_ID', track.track_id) count1 = '{} {:.1f}'.format('Total Persons Count', person_count) bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 100, (0, 255, 0), 2) cv2.putText(frame, str(count1), (20, 50), 0, 5e-3 * 100, (247, 7, 7), 2) cv2.putText(frame, '{:.2f}ms'.format((time.time() - t1) * 1000), (20, 20), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5, color=(247, 7, 7), thickness=1) cv2.imshow('Detections Window', frame) ''' #cv2.namedWindow("Detections Window", cv2.WINDOW_AUTOSIZE) #cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(247, 7, 7), 1) #cv2.putText(frame, str(count), (int(bbox[0]), int(bbox[1])), fontFace=cv2.FONT_HERSHEY_COMPLEX, # fontScale=0.5, color=(247, 7, 7), thickness=1) person_count=0 count1=0 for det in detections: person_count = person_count+1 count = '{} {:.1f}'.format('Count', person_count) count1 = '{} {:.1f}'.format('Total Count', person_count) bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(247, 7, 7), 2) cv2.putText(frame, str(count), (int(bbox[0]), int(bbox[1])), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5, color=(247, 7, 7), thickness=2) ''' if writeVideo_flag: # save a frame print("Writing detections in file") out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write(str(person_count) + '\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def Display(): print("Start Displaying") yolo = YOLO() # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True w = 768 h = 432 fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 max_boxs = 0 person_track = {} yolo2 = YOLO2() while True: if q.empty() != True: #读取打卡信息 face = [] cur1 = conn.cursor() # 获取一个游标 sql1 = "select * from worker" cur1.execute(sql1) data = cur1.fetchall() for d in data: # 注意int类型需要使用str函数转义 name = str(d[1]) + '_' + d[2] face.append(name) cur1.close() # 关闭游标 #获取队列帧 frame = q.get() t1 = time.time() #进行安全措施检测 image = Image.fromarray(frame[..., ::-1]) # bgr to rgb img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) frame, wear = yolo2.detect_image(img) frame = np.array(frame) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # 获取警戒线 #cv2.line(frame, (132,368), (229, 368), (0, 255, 255), 3) cv2.line(frame, (275,360), (378, 360), (0, 255, 255), 1) transboundaryline = t.line_detect_possible_demo(frame) #yolo目标检测 boxs = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if len(boxs) > max_boxs: max_boxs = len(boxs) # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if max_boxs < track.track_id: tracker.tracks.remove(track) tracker._next_id = max_boxs + 1 if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2) PointY = bbox[3] if track.track_id not in person_track: track2 = copy.deepcopy(track) person_track[track.track_id] = track2 else: track2 = copy.deepcopy(track) bbox2 = person_track[track.track_id].to_tlbr() PointX2 = bbox2[0] + ((bbox2[2] - bbox2[0]) / 2) PointY2 = bbox2[3] distance = math.sqrt(pow(PointX - PointX2, 2) + pow(PointY - PointY2, 2)) #修正 if distance < 120: person_track[track.track_id] = track2 else: # print('last',track.track_id) dis = {} for key in person_track: bbox3 = person_track[key].to_tlbr() PointX3 = bbox3[0] + ((bbox3[2] - bbox3[0]) / 2) PointY3 = bbox3[3] d = math.sqrt(pow(PointX3 - PointX, 2) + pow(PointY3 - PointY, 2)) dis[key] = d dis = sorted(dis.items(), key=operator.itemgetter(1), reverse=False) track2.track_id = dis[0][0] person_track[dis[0][0]] = track2 tracker.tracks.remove(track) tracker.tracks.append(person_track[track.track_id]) # 写入class try: box_title = face[track2.track_id - 1] except Exception as e: box_title = str(track2.track_id) + "_" + "unknow" if box_title not in workers: wid = box_title.split('_')[0] localtime = time.asctime(time.localtime(time.time())) workers[box_title] = wk.Worker() workers[box_title].set(box_title, localtime, (int(PointX), int(PointY))) cur2 = conn.cursor() # 获取一个游标 sql2 = "UPDATE worker SET in_time='" + localtime + "' WHERE worker_id= '" + wid + "'" cur2.execute(sql2) cur2.close() # 关闭游标 else: localtime = time.asctime(time.localtime(time.time())) yoloPoint = (int(PointX), int(PointY)) wear_dic = {} workers[box_title].current_point = yoloPoint workers[box_title].track_point.append(workers[box_title].current_point) mytrack = str(workers[box_title].track_point) wid = box_title.split('_')[0] # 卡尔曼滤波预测 if wid not in utils.KalmanNmae: utils.myKalman(wid) if wid not in utils.lmp: utils.setLMP(wid) cpx, cpy = utils.predict(workers[box_title].current_point[0], workers[box_title].current_point[1], wid) if cpx[0] == 0.0 or cpy[0] == 0.0: cpx[0] = workers[box_title].current_point[0] cpy[0] = workers[box_title].current_point[1] workers[box_title].next_point = (int(cpx), int(cpy)) cur3 = conn.cursor() # 获取一个游标 sql3 = "UPDATE worker SET current_point= '" + str(workers[box_title].current_point) + "' ,track_point = '" + mytrack + "',next_point = '" + str(workers[box_title].next_point) + "' WHERE worker_id= '" + wid + "'" cur3.execute(sql3) cur3.close() # 写入安全措施情况 if len(wear) > 0: for w in wear: wear_dis = int(math.sqrt(pow(w[0] - yoloPoint[0], 2) + pow(w[1] - yoloPoint[1], 2))) wear_dic[wear_dis] = w wear_dic = sorted(wear_dic.items(), key=operator.itemgetter(0), reverse=False) if wear_dic[0][0] < 120: cur4 = conn.cursor() # 获取一个游标 if wear[wear_dic[0][1]] == 1: if len(workers[box_title].wear['no helmet']) == 0: workers[box_title].wear['no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: print(box_title,workers[box_title].wear['no helmet']) if localtime not in workers[box_title].wear['no helmet']: workers[box_title].wear['no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 2: if len(workers[box_title].wear['no work cloths']) == 0: workers[box_title].wear['no work cloths'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[box_title].wear['no work cloths']: workers[box_title].wear['no work cloths'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 3: if len(workers[box_title].wear['unsafe wear']) == 0: workers[box_title].wear['unsafe wear'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[box_title].wear['unsafe wear']: workers[box_title].wear['unsafe wear'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 # 写入越线情况 if len(workers[box_title].track_point) > 4: for i in range(len(transboundaryline)): p1 = (transboundaryline[i][0], transboundaryline[i][1]) p2 = (transboundaryline[i][2], transboundaryline[i][3]) p3 = workers[box_title].track_point[-2] p4 = workers[box_title].track_point[-1] a = t.IsIntersec(p1, p2, p3, p4) if a == '有交点': cur5 = conn.cursor() # 获取一个游标 cur6 = conn.cursor() # 获取一个游标 cur5.execute( "select time from transboundary where worker_id = '" + wid + "' ") qurrytime = cur5.fetchone() cur5.close() # 关闭游标 if qurrytime == None: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 else: temp1 = 0 for qt in qurrytime: if qt == localtime: temp1 = 1 if temp1 == 0: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 if len(workers[box_title].track_point) >= 20: workers[box_title].previous_point = workers[box_title].track_point[-5] conn.commit() try: cv2.putText(frame, face[track2.track_id - 1], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) except Exception as e: cv2.putText(frame, "unknow", (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str( boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = False asyncVideo_flag = False file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo, input): #拡張子ありのファイル名 basename = os.path.basename(input) print(" START YOLOv4 + DeepSort input file is ", basename) # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = '../model/mars-small128.pb' cencoder = gdet.create_box_encoder(model_filename, batch_size=1) pencoder = gdet.create_box_encoder(model_filename, batch_size=1) cmetric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) pmetric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) ctracker = Tracker(cmetric) ptracker = Tracker(pmetric) tracking = True writeVideo_flag = True #推論したいカテゴリを設定 cl_list = ['Pedestrian', 'Car'] video_capture = cv2.VideoCapture(input) fps = 0.0 fps_imutils = imutils.video.FPS().start() if writeVideo_flag: basename_without_ext = os.path.splitext(os.path.basename(input))[0] fname = basename_without_ext + 'output_yolov4.mp4' output_path = '../output/' + fname video_FourCC = int(video_capture.get(cv2.CAP_PROP_FOURCC)) video_fps = video_capture.get(cv2.CAP_PROP_FPS) video_size = (int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) frame_index = -1 Nm_fr = 0 all_result = [] while True: Car_result_ALL = [] Pedestrian_result_ALL = [] Nm_fr = Nm_fr + 1 ret, frame = video_capture.read() # frame shape 1920*1216*3 if ret != True: break print("Frame no. = ", Nm_fr) t1 = time.time() image = Image.fromarray(frame[..., ::-1]) cboxes, cconfidence, cclasses = yolo.detect_image(image, cl_list[1]) pboxes, pconfidence, pclasses = yolo.detect_image(image, cl_list[0]) if tracking: cfeatures = cencoder(frame, cboxes) pfeatures = pencoder(frame, pboxes) cdetections = [Detection(cbbox, cconfidence, ceach_class, cfeature) for cbbox, cconfidence, ceach_class, cfeature in \ zip(cboxes, cconfidence, cclasses, cfeatures)] pdetections = [Detection(pbbox, pconfidence, peach_class, pfeature) for pbbox, pconfidence, peach_class, pfeature in \ zip(pboxes, pconfidence, pclasses, pfeatures)] #else: # detections = [Detection_YOLO(bbox, confidence, each_class) for bbox, confidence, each_class in \ # zip(boxes, confidence, classes)] # Run non-maxima suppression. cboxes = np.array([d.tlwh for d in cdetections]) cscores = np.array([d.confidence for d in cdetections]) cindices = preprocessing.non_max_suppression( cboxes, nms_max_overlap, cscores) cdetections = [cdetections[i] for i in cindices] pboxes = np.array([d.tlwh for d in pdetections]) pscores = np.array([d.confidence for d in pdetections]) pindices = preprocessing.non_max_suppression( pboxes, nms_max_overlap, pscores) pdetections = [pdetections[i] for i in pindices] if tracking: # Call the tracker ctracker.predict() ctracker.update(cdetections) ptracker.predict() ptracker.update(pdetections) for ctrack in ctracker.tracks: if not ctrack.is_confirmed( ) or ctrack.time_since_update > 1: continue cbbox = ctrack.to_tlbr() cv2.rectangle(frame, (int(cbbox[0]), int(cbbox[1])), (int(cbbox[2]), int(cbbox[3])), (0, 0, 255), 3) cv2.putText(frame, "ID: " + str(ctrack.track_id), (int(cbbox[0]), int(cbbox[1])), 0, \ 1.5e-3 * frame.shape[0], (0, 0, 255), 3) #OUTPUT TRACKING ID = int(ctrack.track_id) left = int(cbbox[0]) top = int(cbbox[1]) right = int(cbbox[2]) bottom = int(cbbox[3]) Car_result = { 'id': ID, 'box2d': [left, top, right, bottom] } #予測結果 print("Car_result = ", Car_result) Car_result_ALL.append(Car_result) for ptrack in ptracker.tracks: if not ptrack.is_confirmed( ) or ptrack.time_since_update > 1: continue pbbox = ptrack.to_tlbr() cv2.rectangle(frame, (int(pbbox[0]), int(pbbox[1])), (int(pbbox[2]), int(pbbox[3])), (255, 0, 0), 3) cv2.putText(frame, "ID: " + str(ptrack.track_id), (int(pbbox[0]), int(pbbox[1])), 0, \ 1.5e-3 * frame.shape[0], (255, 0, 0), 3) #OUTPUT TRACKING ID = int(ptrack.track_id) left = int(pbbox[0]) top = int(pbbox[1]) right = int(pbbox[2]) bottom = int(pbbox[3]) Pedestrian_result = { 'id': ID, 'box2d': [left, top, right, bottom] } #予測結果 print("Pedestrian_result = ", Pedestrian_result) Pedestrian_result_ALL.append(Pedestrian_result) #YOLOv4 output to frame for Car for cdet in cdetections: cbbox = cdet.to_tlbr() cscore = "%.2f" % round(cdet.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(cbbox[0]), int(cbbox[1])), (int(cbbox[2]), int(cbbox[3])), (255, 255, 255), 2) if len(cclasses) > 0: ceach_class = cdet.cls cv2.putText(frame, str(ceach_class) + " " + cscore, (int(cbbox[0]), int(cbbox[3])), 0, \ 1.5e-3 * frame.shape[0], (255, 255, 255), 2) #YOLOv4 output to frame for Pedestrian for pdet in pdetections: pbbox = pdet.to_tlbr() pscore = "%.2f" % round(pdet.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(pbbox[0]), int(pbbox[1])), (int(pbbox[2]), int(pbbox[3])), (127, 127, 127), 2) if len(pclasses) > 0: peach_class = pdet.cls cv2.putText(frame, str(peach_class) + " " + pscore, (int(pbbox[0]), int(pbbox[3])), 0, \ 1.5e-3 * frame.shape[0], (127, 127, 127), 2) # Each frame result all_result.append({ 'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL }) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 print(" FPS = %f" % (fps)) if writeVideo_flag: out.release() video_capture.release() fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) return {basename: all_result}
def read(stack): print('Process to read: %s' % os.getpid()) yolo = YOLO() # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort d = os.path.dirname(__file__) model_filename = d + '/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) max_boxs = 0 face = ['17208019'] #目标上一帧的点 history = {} #id和标签的字典 person = {} #赋予新标签的id列表 change = [] while True: if len(stack) != 0: frame = stack.pop() t1 = time.time() frame_count = 0 localtime = time.asctime(time.localtime(time.time())) utils.draw(frame, line.readline()) # 获取警戒线 transboundaryline = line.readline() utils.draw(frame, transboundaryline) img = Image.fromarray(frame) #img.save('frame.jpg') ''' cv2.line(frame, (837, 393), (930, 300), (0, 255, 255), 3) transboundaryline = t.line_detect_possible_demo(frame) ''' # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if len(boxs) > max_boxs: max_boxs = len(boxs) # Call the tracker tracker.predict() tracker.update(detections) #一帧信息 info = {} target = [] for track in tracker.tracks: #一帧中的目标 per_info = {} if not track.is_confirmed() or track.time_since_update > 1: continue if track.track_id not in person: person[track.track_id] = str(track.track_id) bbox = track.to_tlbr() PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2) PointY = bbox[3] dis = int(PointX) - 1200 try: if dis < 15: if track.track_id not in change: person[track.track_id] = face.pop(0) change.append(track.track_id) except: print('非法入侵') #当前目标 if track.track_id not in change: per_info['worker_id'] = 'unknow' + str(track.track_id) else: per_info['worker_id'] = person[track.track_id] #当前目标坐标 yoloPoint = (int(PointX), int(PointY)) per_info['current_point'] = yoloPoint # 卡尔曼滤波预测 if per_info['worker_id'] not in utils.KalmanNmae: utils.myKalman(per_info['worker_id']) if per_info['worker_id'] not in utils.lmp: utils.setLMP(per_info['worker_id']) cpx, cpy = utils.predict(yoloPoint[0], yoloPoint[1], per_info['worker_id']) if cpx[0] == 0.0 or cpy[0] == 0.0: cpx[0] = yoloPoint[0] cpy[0] = yoloPoint[1] if frame_count > 20: per_info['next_point'] = (int(cpx), int(cpy)) else: per_info['next_point'] = yoloPoint # 写入越线情况 if per_info['worker_id'] in history: per_info['transboundary'] = 'no' #print(transboundaryline) line1 = [ per_info['next_point'], history[per_info['worker_id']] ] a = line.IsIntersec2(transboundaryline, line1) if a == '有交点': print('越线提醒') per_info['transboundary'] = 'yes' history[per_info['worker_id']] = per_info['current_point'] frame_count = frame_count + 1 #print(per_info) #画目标框 #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, per_info['worker_id'], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) target.append(per_info) info['time'] = localtime #info['frame'] = str(img.tolist()).encode('base64') info['frame'] = 'frame' info['target'] = target #写入josn info_json = json.dumps(info) info_queue.put(info_json) getInfo(info_queue) cv2.imshow("img", frame) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break
def main(self, frame_data): # Definition of the parameters nms_max_overlap = 1.0 # set HyperParams size = 416 iou = 0.45 score = 0.50 info = False input_size = size self.person1.is_used = 0 self.person2.is_used = 0 self.person3.is_used = 0 self.person4.is_used = 0 out = None frame_data = cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB) image_data = cv2.resize(frame_data, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # start_time = time.time() batch_data = tf.constant(image_data) pred_bbox = self.infer(batch_data) # Yolo 모델 통과시켜서 바운딩 박스 좌표 반환 for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] # 좌표 pred_conf = value[:, :, 4:] # 벡터값 boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame_data.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if count: cv2.putText(frame_data, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) # print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = self.encoder(frame_data, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # DeepSort Tracking Start # Call the tracker self.tracker.predict() # load tracker self.tracker.update(detections) match_person = 0 # reset unmatched for center compare unmatched = [] # update tracks for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # draw bbox on screen # 이거 처리까지 하고 나서 보내야 할 것 같다. bbox = track.to_tlbr() class_name = track.get_class() # Matching index with index_stack if self.person1.is_exist(track.track_id): self.person1.centerX, self.person1.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person1.index_stack[0], colors, bbox) self.person1.is_used = 1 match_person += 1 elif self.person2.is_exist(track.track_id): self.person2.centerX, self.person2.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person2.index_stack[0], colors, bbox) self.person2.is_used = 1 match_person += 1 elif self.person3.is_exist(track.track_id): self.person3.centerX, self.person3.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person3.index_stack[0], colors, bbox) self.person3.is_used = 1 match_person += 1 elif self.person4.is_exist(track.track_id): self.person4.centerX, self.person4.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person4.index_stack[0], colors, bbox) self.person4.is_used = 1 match_person += 1 else: unmatched.append([track.track_id, bbox]) print('found new object!') unmatched = np.array(unmatched, dtype=object) # Missed Person Only 1 if match_person == 3 and len(unmatched) == 1: if self.person1.is_used == 0: self.person1.centerX, self.person1.centerY = self.getCenter( unmatched[0][1]) self.person1.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person1.index_stack[0], colors, unmatched[0][1]) self.person1.is_used = 1 match_person += 1 elif self.person2.is_used == 0: self.person2.centerX, self.person2.centerY = self.getCenter( unmatched[0][1]) self.person2.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person2.index_stack[0], colors, unmatched[0][1]) self.person2.is_used = 1 match_person += 1 elif self.person3.is_used == 0: self.person3.centerX, self.person3.centerY = self.getCenter( unmatched[0][1]) self.person3.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person3.index_stack[0], colors, unmatched[0][1]) self.person3.is_used = 1 match_person += 1 elif self.person4.is_used == 0: self.person4.centerX, self.person4.centerY = self.getCenter( unmatched[0][1]) self.person4.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person4.index_stack[0], colors, unmatched[0][1]) self.person4.is_used = 1 match_person += 1 else: print("ERROR : Something problem on object.is_used") # Missed Person Over 2 if match_person <= 3 and len(unmatched) >= 1: for unmatch in unmatched: if match_person >= 4: break else: # Apply center location Euclidean Distance EUD_min = self.get_EuclideanDistance(unmatch) print(EUD_min) if not len(str(EUD_min)) == 0: self.draw_box(frame_data, EUD_min, colors, unmatch[1]) match_person += 1 # if enable info flag then print details about each track if info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format( str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) result = cv2.cvtColor(frame_data, cv2.COLOR_RGB2BGR) return result
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = False #video_capture = cv2.VideoCapture("rtsp://*****:*****@192.168.30.81:554/3") #video_capture = cv2.VideoCapture("test.mp4") video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 htr = HumanTraceRecorder() frameCounter = 0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 #frame = cv2.resize(frame,(640,360), interpolation=cv2.INTER_LINEAR) if ret != True: break t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) print(track.name_id) if (frameCounter % 5) == 0: x = int((bbox[0] + bbox[2]) / 2) y = int((bbox[1] + bbox[3]) / 2) new_time = frameCounter if track.name_id != 0: # this is a tracked person htr.updatePerson(track.name_id, x, y, new_time) else: subimage = image.crop((int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) subimagearr = np.asarray(subimage) faceID = getFaceID( subimagearr, predictor_path= "./deep_sort/traceRecording/shape_predictor_5_face_landmarks.dat", face_rec_model_path= "./deep_sort/traceRecording/dlib_face_recognition_resnet_model_v1.dat" ) if faceID == 0: print("No face found") else: find, name_id = htr.checkPerson(faceID) if find: track.name_id = htr.updatePerson( name_id, x, y, new_time) else: track.name_id = htr.addNewPerson( faceID, x, y, new_time) cv2.putText(frame, str(track.name_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) frameCounter += 1 """ if track.track_id != 0: # this is a tracked person dataBase.addPersonTrace(track.track_id, location) # add the new location to the person's trace else : # new detected person, need to verify the identity subimage = image(cv2.Rect(int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) result = faceDetect(subimage) # result should be the face data detected if result: # make sure there is a face detection if dataBase.checkPerson(result): # check if the person was recorded before name = dataBase.checkName(result) else: # new person, add to dataBase newName = XXXXXXXXX # XXXXXXXXX is the new name dataBase.addPerson(result, newName) name = newName dataBase.addPersonTrace(name, location) # add the new location to the person's trace track.track_id = name else : # no face detected, cannot identify the person # do nothing, leave track unchange and wait for the next chance """ for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows() htr.saveToFile()
def main(yolo): t0 = time.time() max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 counter = [] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) #跟踪使用 find_objects = ['person'] metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') if args["ids"] == False: out = cv2.VideoWriter('./output/output%s.avi' % args["camera"][1], fourcc, 50, (w, h)) else: out = cv2.VideoWriter( './output/output%s_reid.avi' % args["camera"][1], fourcc, 50, (w, h)) list_file = open('detection_rslt.txt', 'w') frame_index = -1 nump = 1 #fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() frame2 = copy.deepcopy(frame) #image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb 仅yolo使用 boxs, confidence, class_names = yolo.detect_image(image) print(boxs) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] makequery = True for det in detections: bbox = det.to_tlbr() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) #跟踪框 color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] list_file.write(str(frame_index) + ',') #3-5-7-9 list_file.write(str(track.track_id) + ',') #画面内的所有人id b0 = str(bbox[0] ) #.split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1] b1 = str(bbox[1] ) #.split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1] b2 = str(bbox[2] - bbox[0] ) #.split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1] b3 = str(bbox[3] - bbox[1]) #放置id list_file.write( str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3)) list_file.write('\n') if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) #person i += 1 center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) pts[track.track_id].append(center) thickness1 = 5 for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (255, 255, 255), thickness) if args["ids"] == False: cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) #id cv2.circle(frame, (center), 1, color, thickness1) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) try: num = (int(args["camera"][1]) - 1) * 200 path = 'Z:\\pro2\\whole\\person\\gallery\\%04d' % int( track.track_id + num) if not os.path.exists(path): os.makedirs(path) if len(os.listdir(path)) <= 150: #最多存储150张相片 crop = frame2[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] crop = cv2.resize(crop, (64, 128), interpolation=cv2.INTER_AREA ) #CUBIC 对扩大图片 area 对缩小图片 filepath = path + '\\' + '%04d' % int( track.track_id + num) + '_%s_' % args["camera"] + '%04d' % int( len(os.listdir(path)) + 1) + '_%.2f' % (video_capture.get(0) / 1000) + '.jpg' #%04d cv2.imwrite(filepath, crop) except: continue #单独索引 else: makequery = False id1 = int(args["ids"]) if int(track.track_id) == id1: cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) #id cv2.circle(frame, (center), 1, color, thickness1) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) #person else: continue count = len(set(counter)) cv2.putText(frame, "Total Pedestrian Counter: " + str(count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Pedestrian Counter: " + str(i), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.namedWindow("YOLO4_Deep_SORT", 0) cv2.resizeWindow('YOLO4_Deep_SORT', 1024, 768) cv2.imshow('YOLO4_Deep_SORT', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 out.write(frame) frame_index = frame_index + 1 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break #makequery if makequery == True: root_path = 'Z:\\pro2\\whole\\person\\gallery\\' copy_path = 'Z:\\pro2\\whole\\person\\query\\' ids = os.listdir(root_path) #print(ids) for i in ids: img_path = root_path + i img = os.listdir(img_path) indeximg = img[int(len(img) / 2)] old_name = img_path + '\\' + indeximg new_path = copy_path + i new_name = new_path + '\\' + indeximg if not os.path.exists(new_path): os.makedirs(new_path) shutil.copyfile(old_name, new_name) print(" ") print("[Finish]") end = time.time() print("the whole time ", end - t0) if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): pics = os.listdir('pictures') for g in pics: os.remove(os.path.join('pictures/', g)) model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) nms_max_overlap = 1.0 video_capture = cv2.VideoCapture('videos/walk1.mp4') # frame1 = cv2.imread('combine.jpg') # video_capture.set(cv2.CAP_PROP_POS_FRAMES, 189) # ret, frame2 = video_capture.read() # cv2.imwrite('189.jpg', frame2) # combine_pic(frame1, frame2, yolo, encoder, 'combine2.jpg') frame_cnt = 0 w = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) # init_w = w init_w = 0 add_cnt = 0 all_boxes = [] while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] out_boxes = [] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) out_boxes.append([ int(bbox[0] - 25), int(bbox[1] - 25), int(bbox[2] + 25), int(bbox[3] + 25) ]) all_boxes.append(out_boxes) if len(out_boxes) == 0: continue # if out_boxes[0][0] < 0: # break # if out_boxes[0][2] < init_w: # init_w = out_boxes[0][0] # if add_cnt == 0: # output_frame = frame.copy() # else: # output_frame = combine_pic(output_frame, frame, out_boxes) # add_cnt += 1 # print(add_cnt) if out_boxes[0][2] > w: break if out_boxes[0][0] >= init_w: init_w = out_boxes[0][2] if add_cnt == 0: output_frame = frame.copy() else: output_frame = combine_pic(output_frame, frame, out_boxes) add_cnt += 1 print(add_cnt) all_boxes = np.array(all_boxes) np.save('boxes.npy', all_boxes)
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = './models/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(args.input_video) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('/media/rudy/C0E28D29E28D252E/2017_taroko/终点前/video_YDXJ0101.avi', fourcc, 15, (w, h)) list_file = open('/media/rudy/C0E28D29E28D252E/2017_taroko/终点前/detection_YDXJ0101.txt', 'w') frame_index = -1 fps = 0.0 n = 0 id_frame = 0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break if int(args.skip_frame) != n: n += 1 continue n = 0 t1 = time.time() image = Image.fromarray(frame) boxs, classes = yolo.detect_image(image) for idb, box in enumerate(boxs): cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0])+int(box[2]), int(box[1])+int(box[3])),(255,255,255), 2) cv2.putText(frame, str(classes[idb]),(int(box[0]), int(box[1])),0, 5e-3 * 100, (0,255,0),2) features = encoder(frame, boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for idb, det in enumerate(detections): bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # cv2.putText(frame, str(classes[idb]), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 100, (0, 255, 0), 2) # cv2.imshow('gallery', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ';') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 id_frame +=1 print("idx_frame-%d, fps= %f"%(id_frame, fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def deteksi_ds(yolo): # Definisikan Parameternya max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 tulis_video_output = True # Model DEEP SORT diambil di sini.. namafile_model = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(namafile_model, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture("demo-1.mp4") video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 160) video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 120) # # Kalau ingin menulis hasil rekaman videonya buat VideoWriter object dgn codec-nya # sekalian tulis juga hasil deteksi ke file txt # if tulis_video_output: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('hasil_output2.avi', fourcc, 15, (w, h)) list_file = open('hasil_deteksi.txt', 'w') frame_index = -1 # -------------------- # MULAI CAPTURE VIDEO # -------------------- # FPS awal fps = 0.0 jum_track = set() while True: ret, frame = video_capture.read() # Warna Gray # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # baca frame if ret != True: break # siapkan hitungan waktu t1 = time.time() # konversi bgr ke rgb image = Image.fromarray(frame[..., ::-1]) kotak = yolo.detect_image(image) # print("[-] Kotak : ",len(kotak)) features = encoder(frame, kotak) # score to 1.0 here). deteksi_box = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(kotak, features) ] # panggil non-maxima suppression boxes = np.array([d.tlwh for d in deteksi_box]) scores = np.array([d.confidence for d in deteksi_box]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) deteksi_box = [deteksi_box[i] for i in indices] # Panggil class tracker tracker.predict() tracker.update(deteksi_box) # Tracking hasil deteksi for track in tracker.tracks: # apakah berhasil di track? if not track.is_confirmed() or track.time_since_update > 1: continue # buat bounding box-nya bbox = track.to_tlbr() # box # Ini adalah prediction box cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # teks untuk box_id cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 1) jum_track.add(track.track_id) cv2.putText(frame, "> Jumlah Orang : " + str(max(jum_track)), (10, 25), cv2.FONT_HERSHEY_DUPLEX, .5, (0, 0, 255), 1) print(">> Hits (Manusia) :", max(jum_track)) # # pastikan box deteksi ada terus # ini adalah Detection Box for det in deteksi_box: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # tampilkan - DEBUG cv2.imshow('Test Video', frame) # # Jika tulis video? # if tulis_video_output: out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(kotak) != 0: for i in range(0, len(kotak)): list_file.write( str(kotak[i][0]) + ' ' + str(kotak[i][1]) + ' ' + str(kotak[i][2]) + ' ' + str(kotak[i][3]) + ' ') list_file.write('\n') # tampilan fps fps = (fps + (1. / (time.time() - t1))) / 2 print(">> fps= %f" % (fps)) # tekan Q untuk stop! if cv2.waitKey(1) & 0xFF == ord('q'): break # bersihkan video_capture video_capture.release() if tulis_video_output: # release video out.release() # tutup file list_file.close() # release semua cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1 # initialize deep sort model_filename = 'deep_sort/feature_extractor/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # configuration of object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # Capture video try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # looping over each frame of the video while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('End!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run the detections batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # Class allowed to be tracked class_names = utils.read_class_names(cfg.YOLO.CLASSES) # allowed_classes = list(class_names.values()) allowed_classes = ['person', 'car'] names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) # remove detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # The tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if FLAGS.show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(0) fps = 0.0 while True: _, frame = video_capture.read() # frame shape 640*480*3 t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track, det in zip(tracker.tracks, detections): bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 4) bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 4) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 480, (124, 252, 0), 4) cv2.imshow('', frame) fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
def Display(self, yolo): self.ui.Open.setEnabled(False) self.ui.Close.setEnabled(True) # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True if writeVideo_flag: # Define the codec and create VideoWriter object w = int(self.cap.get(3)) h = int(self.cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter("output.avi", fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while self.cap.isOpened(): ret, frame = self.cap.read() if ret != True: break t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) str1 = "Identification List:" + '\n\n' for track in tracker.tracks: if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() import random if track.track_id == 2 and self.fileName.split( "/")[-1] == "reid.mp4": str1 = str1 + " person" + str( track.track_id) + " —— LaiSiyu (" + str( random.random() * 0.1 + 0.85) + ")" + '\n\n' # str1 += " person{0} - LaiSiyu ({:%.2f})".format(track.track_id, random.random() * 0.13 + 0.85) ui.label_2.setText(str1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 0, 255), 2) else: str1 = str1 + " person" + str(track.track_id) + '\n\n' ui.label_2.setText(str1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) # TODO # RGB转BGR frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) img = QImage(frame.data, frame.shape[1], frame.shape[0], QImage.Format_RGB888) self.ui.DisplayLabel.setPixmap(QPixmap.fromImage(img)) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break if self.isCamera: cv2.waitKey(1) else: cv2.waitKey(int(1000 / self.frameRate)) # 判断关闭事件是否已触发 if True == self.stopEvent.is_set(): # 关闭事件置为未触发,清空显示label self.stopEvent.clear() self.ui.DisplayLabel.clear() self.ui.label_2.clear() self.ui.Close.setEnabled(False) self.ui.Open.setEnabled(True) break self.cap.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=40) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) objects = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if (FLAGS.class_1 == 'all'): objects += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) elif (FLAGS.class_1 != 'all'): if (class_name == FLAGS.class_1): objects += 1 color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # print("Objetos filtrados:{}".format(objects)) # print N_objects on screen cv2.putText(img, "# Objetos: {}".format(objects), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN for det in detections: bbox = det.to_tlbr() cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # print fps on screen #fps = ( fps + (1./(time.time()-t1)) ) / 2 #cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def video_to_json(cap_name, jfile, cam_id, start_time): # video capture vcap = cv.VideoCapture(cap_name) fps = int(math.ceil(vcap.get(cv.CAP_PROP_FPS))) frame_idx = 0 proc_fps = 0.0 output_data = [] seconds = 1 yolo_detect_avg = 0.0 fps_avvg = 0.0 feature_encode_avg = 0.0 feature_encode_avg_25 = 0.0 yolo_detect_avg_25 = 0.0 fps_avg_25 = 0.0 initial = time.time() timestamp = int(start_time) while True: ret, frame = vcap.read() # frame shape 640*480*3 print(ret) if ret != True: break t1 = time.time() image = Image.fromarray(frame) t1_yolo_start = time.time() boxs = yolo.detect_image(image) # x,y,w,h, score, class t2_yolo_end = time.time() yolo_detect_avg += t2_yolo_end - t1_yolo_start yolo_detect_avg_25 += t2_yolo_end - t1_yolo_start # separate out score, class info from loc scores_classes = [box[-2:] for box in boxs] boxs = [box[0:4] for box in boxs] f_t_start = time.time() features = encoder(frame, boxs) f_t_end = time.time() feature_encode_avg += f_t_end - f_t_start feature_encode_avg_25 += f_t_end - f_t_start # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # filter unwanted obj by classfication for idx, det in list(enumerate(detections)): det.score = "%.2f" % scores_classes[idx][0] det.cls = scores_classes[idx][1] detections = [ det for det in detections if yolo.class_names[det.cls] in [ 'person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck' ] ] # filter detections with roi detections = [ det for det in detections if 0 < cv.pointPolygonTest(np.array(roi), ( det.to_xyah()[0], det.to_xyah()[1]), False) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update > 1: continue bbox = track.to_tlbr() # update track speed in y direction, in pixels/seconds if not hasattr(track, 'first_frame'): track.first_frame = frame_idx track.first_bbox = track.to_tlwh() if frame_idx > track.first_frame + 1 * fps: cur_bbox = track.to_tlwh() cur_c_y = (cur_bbox[1] + cur_bbox[3] / 2) first_c_y = (track.first_bbox[1] + track.first_bbox[3] / 2) track.speed = (cur_c_y - first_c_y) / (frame_idx - track.first_frame) * fps # Add aggregated data speeds = [ abs(track.speed) for track in tracker.tracks if hasattr(track, 'speed') ] avg_spd = np.sum(speeds) / len(speeds) * 0.0005787 * 3600 # km/hr lane_area = cv.contourArea(np.array(roi)) box_area = np.sum([det.tlwh[2] * det.tlwh[3] for det in detections]) occupancy = box_area / lane_area proc_fps = (proc_fps + (1. / (time.time() - t1))) / 2 # print("fps= %f"%(proc_fps)) fps_avvg += proc_fps fps_avg_25 += proc_fps if frame_idx % fps in np.linspace(0, fps, samples_per_sec).astype(int)[:-1]: output_data.append({ "data_event_name": "vehicle_detection", "camera_id": cam_id, "lane_id": 1, "frame_idx": frame_idx, "average_speed": avg_spd, "lane_occupancy": occupancy, "time_stamp": float(start_time) + frame_idx / fps, "vehicles": [track_encoder(trk) for trk in tracker.tracks] }) frame_idx += 1 if frame_idx % 25 == 0: print("THe {} second:".format(str(seconds))) seconds += 1 print("Total frame {}".format(str(frame_idx))) print( "Average Speed for FPS is {}, for Yolo computing is {} second, for feature encoding is {} second" .format(fps_avg_25 / float(25.0), yolo_detect_avg_25 / float(25.0), feature_encode_avg_25 / float(25.0))) yolo_detect_avg_25 = 0 feature_encode_avg_25 = 0 fps_avg_25 = 0 #if frame_idx > 30: break vcap.release() t_final = time.time() #print("Final Average Speed for FPS is {}, for Yolo computing is {} second, for feature encoding is {} second" # .format(fps_avvg/frame_idx, yolo_detect_avg / frame_idx, feature_encode_avg / frame_idx)) #print("The total time for this process is {}".format(t_final-initial)) with open(jfile, 'w') as jout: json.dump(output_data, jout)
def main(_argv): with open("./config_birdview.yml", "r") as ymlfile: bird_view_cfg = yaml.load(ymlfile) width_og, height_og = 0, 0 corner_points = [] for section in bird_view_cfg: corner_points.append(bird_view_cfg["image_parameters"]["p1"]) corner_points.append(bird_view_cfg["image_parameters"]["p2"]) corner_points.append(bird_view_cfg["image_parameters"]["p3"]) corner_points.append(bird_view_cfg["image_parameters"]["p4"]) width_og = int(bird_view_cfg["image_parameters"]["width_og"]) height_og = int(bird_view_cfg["image_parameters"]["height_og"]) img_path = bird_view_cfg["image_parameters"]["img_path"] size_height = bird_view_cfg["image_parameters"]["size_height"] size_width = bird_view_cfg["image_parameters"]["size_width"] tr = np.array([ bird_view_cfg["image_parameters"]["p4"][0], bird_view_cfg["image_parameters"]["p4"][1], ]) tl = np.array([ bird_view_cfg["image_parameters"]["p2"][0], bird_view_cfg["image_parameters"]["p2"][1], ]) br = np.array([ bird_view_cfg["image_parameters"]["p3"][0], bird_view_cfg["image_parameters"]["p3"][1], ]) bl = np.array([ bird_view_cfg["image_parameters"]["p1"][0], bird_view_cfg["image_parameters"]["p1"][1], ]) widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2)) widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2)) heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2)) maxHeight = max(int(heightA), int(heightB)) matrix, imgOutput = compute_perspective_transform(corner_points, maxWidth, maxHeight, cv2.imread(img_path)) height, width, _ = imgOutput.shape dim = (width, height) # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = "model_data/mars-small128.pb" encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == "tflite": interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures["serving_default"] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) output_video_1, output_video_2 = None, None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int """ width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) """ fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: black_img = cv2.imread("./black_bg.png") black_img = cv2.resize(black_img, dim, interpolation=cv2.INTER_AREA) return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print("Video has ended or failed, try a different video format!") break frame_num += 1 print("Frame #: ", frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255.0 image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == "tflite": interpreter.set_tensor(input_details[0]["index"], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]["index"]) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == "yolov3" and FLAGS.tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] ( boxes, scores, classes, valid_detections, ) = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score, ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ["person"] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText( frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2, ) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap("tab20b") colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) bbox_array = [] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() bbox_array.append( (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle( frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2, ) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), ( int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1]), ), color, -1, ) cv2.putText( frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2, ) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format( str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int( bbox[3])), )) if len(bbox_array) >= 1: array_centroids, array_groundpoints = get_centroids_and_groundpoints( bbox_array) transformed_downoids = compute_point_perspective_transformation( matrix, array_centroids) # Show every point on the top view image for point in transformed_downoids: x, y = point cv2.circle(black_img, (x, y), 60, (0, 255, 0), 2) cv2.circle(black_img, (x, y), 3, (0, 255, 0), -1) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) # result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: if output_video_1 is None and output_video_2 is None: fourcc1 = cv2.VideoWriter_fourcc(*"MJPG") output_video_1 = cv2.VideoWriter( "./video.avi", fourcc1, 25, (frame.shape[1], frame.shape[0]), True) fourcc2 = cv2.VideoWriter_fourcc(*"MJPG") output_video_2 = cv2.VideoWriter( "./bird_view.avi", fourcc2, 25, (black_img.shape[1], black_img.shape[0]), True, ) elif output_video_1 is not None and output_video_2 is not None: output_video_1.write(frame) output_video_2.write(black_img) if cv2.waitKey(1) & 0xFF == ord("q"): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = False config.gpu_options.per_process_gpu_memory_fraction = 0.1 _ = InteractiveSession(config=config) utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter( model_path=f'{FLAGS.weights}_{FLAGS.size}') interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load( f'{FLAGS.weights}_{FLAGS.size}', tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) all_start_time = None frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) Image.fromarray(frame) else: fps = float(frame_num) / (time.time() - all_start_time) print("fps=%.2f size=%d frames=%d deep=%s output=%s" % (fps, FLAGS.size, frame_num, "true" if FLAGS.deep else "false", FLAGS.output)) break frame_num += 1 if FLAGS.info: print("frame_num=%d" % frame_num) start_time = time.time() if all_start_time is None: all_start_time = time.time() image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for _, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker if FLAGS.deep: features = encoder(frame, bboxes) else: features = np.empty((len(bboxes), 0), np.float32) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) # calculate frames per second of running detections if FLAGS.info: fps = 1.0 / (time.time() - start_time) print("fps=%.2f" % fps) if not FLAGS.dont_show: if cv2.waitKey(1) & 0xFF == ord('q'): break if not FLAGS.dont_show: cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) np.random.seed(42) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") writeVideo_flag = True asyncVideo_flag = False file_path = 'testvideo2.avi' # load the COCO class labels our YOLO model was trained on 加载我们的YOLO模型经过培训的COCO类标签 labelsPath = os.path.sep.join(["model_data", "coco_classes.txt"]) LABELS = open(labelsPath).read().strip().split("\n") # print(str(len(LABELS))+"load successfully") # print(LABELS) class_nums = np.zeros(80) counter = np.zeros(80) track_id_max = -1 if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('testvideo2_out.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() img_num = 0 frame_cnt = 0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 frame_copy = frame.copy() if ret != True or frame_cnt > 30: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs, confidence, class_names = yolo.detect_image(image) features = encoder(frame, boxs) detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) # print("print indices!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") # print(indices) # for i in indices: # print(str(i)+class_names[i][0]) # print(indices.shape) detections = [detections[i] for i in indices] class_names = [class_names[i] for i in indices] print("class_name:" + str(class_names)) class_IDs = [] current_nums = np.zeros(80) # class_IDs=[] for class_name in class_names: for i, LABEL in enumerate(LABELS): if class_name[0] == LABEL: current_nums[i] += 1 class_IDs.append(i) # print("person:"+str(current_nums[0])) cv2.putText(frame, 'Current', (20, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 255, 255), 2) cv2.putText(frame, 'Total', (180, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 255), 2) x1 = 20 y1 = 100 for i, cl in enumerate(current_nums): if cl > 0: cv2.putText(frame, LABELS[i] + "=" + str(cl), (x1, y1), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1) # 当前帧各类别数量 y1 = y1 + 20 for i, det in enumerate(detections): bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[1]) + 10), cv2.FONT_HERSHEY_DUPLEX, 0.3, (255, 255, 255), 1) # cv2.putText(frame, class_names[i],(int(bbox[0]), int(bbox[1])-5), 0, 5e-3 * 130, (0, 255, 0), 2) # cv2.putText(frame, class_names[i], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 130,(0, 255, 255), 2) print("Total of detections:" + str(len(detections))) # Call the tracker tracker.predict() tracker.update(detections, class_IDs) # for i, cl in enumerate(class_nums): # if cl > 0: # print("add: " + LABELS[i] + str(cl - class_last_nums[i])) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: print("not track.is_confirmed() or track.time_since_update > 1: " + str(track.track_id)) continue bbox = track.to_tlbr() color = [int(c) for c in COLORS[track.class_id % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.putText(frame, str(track.track_id) + ' ' + LABELS[track.class_id], (int(bbox[0]), int(bbox[1]) - 5), cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1) if track.track_id > track_id_max: counter[track.class_id] = counter[track.class_id] + 1 track_id_max = track.track_id dest = frame_copy[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] pdest = "./output_image/" + str(LABELS[track.class_id]) + str(int(counter[track.class_id])) + ".png" cv2.imwrite(pdest, dest) img_num += 1 # class_nums[track.class_id].append(track.track_id) # print(str(LABELS[track.class_id])+":"+class_nums[track.class_id]) # print("track.id: " + str(track.track_id)) # print("track.class_name: " + str(LABELS[track.class_id])) print(str(counter)) print("--------------------------该帧输出完毕!--------------------------------------") # cv2.putText(frame, 'Total', (200, 60), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 0, 0), 2) # x2 = 200 # y2 = 100 # for i, cl in enumerate(class_nums): # if cl > 0: # cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 0, 0), 2) # y2 = y2 + 20 cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # !!!!!!!!!输出FPS x2 = 180 y2 = 100 for i, cl in enumerate(counter): if cl > 0: cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 255), 1) y2 = y2 + 20 # cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f"%(fps)) frame_cnt += 1 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen # names = {'6_d': 'Thomas Delaney', # '10_b': 'Leroy Sane', # '18_b': 'Leon Goretzka', # '25_b': 'Thomas Muller', # '5_d': 'Dan-Axel Zagadou', # '12_d': 'Zaragoza', # '4_b': 'Niklas Sule', # '14_d': 'Nico Schulz', # '11_d': 'Marco Reus', # 'Referee': 'Referee', # 'ball': 'ball', # '10_d': 'Thorgan Hazard', # '6_b': 'Joshua Kimmich ', # 'gk_b': 'Ron-Thorben Hoffmann(GK)', # '17_b': 'Jérôme Boateng', # '27_b': 'David Alaba', # '9_d': 'Erling Haaland', # '8_d': 'Mahmoud Dahoud', # 'gk_d': 'Luca Unbehaun(GK)', # '19_b': 'Alphonso Davies', # '29_b': 'Kingsley Coman', # '24_d': 'Marcel Schmelzer', # '9_b': 'Robert Lewandowski', # "23_d": 'Emre Can', # } # if class_name == 'Referee': # color = (0, 0, 0) if class_name == 'ball': # color = (255, 255, 255) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) # else: # try: # colors = {'b': (252, 3, 78), 'd': (250, 247, 80)} # color = colors[str(class_name.split('_')[-1])] # except KeyError: # pass # class_name = names[str(class_name)] # color = (250, 247, 80) # color = colors[int(track.track_id) % len(colors)] # color = [i * 255 for i in color] # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1]-30)), (int(bbox[0])+(len(str(class_name)))*17, int(bbox[1])), color, -1) cv2.putText(frame, class_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 251, 46), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False file_path = 'input_video.avi' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov3.mp4', fourcc, 25, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidences = yolo.detect_image(image) boxes = np.array(boxes) confidences = np.array(confidences) # Run non-maxima suppression. indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, confidences) boxes = [boxes[i] for i in indices] confidences = [confidences[i] for i in indices] time1 = time.time() features = encoder(frame, boxes) time2 = time.time() detections = [ Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxes, confidences, features) ] # Call the tracker tracker.predict() tracker.update(detections) time3 = time.time() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) time4 = time.time() time_sum = time4 - t1 print("time:", (time1 - t1) / time_sum, (time2 - time1) / time_sum, (time3 - time2) / time_sum, (time4 - time3) / time_sum) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
'''Timer start''' # out.write(original_frame) # original_frame = original_frame[:,240:1680] fframe = cv2.resize(original_frame,(640,480)) # frame = fframe.copy() # frame = infer(frame) ti1 = time.time() '''yolov3 + deepsort''' image = Image.fromarray(fframe[...,::-1]) #bgr to rgb boxs = yolo.detect_image(image) features = encoder(fframe,boxs) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() #bbox -----> (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]) '對應座標' bbox = np.maximum(bbox, 0) ID = track.track_id x1 = int(bbox[0]) y1 = int(bbox[1]) box_h = int(bbox[3]-y1)
def startRecording_YOLO(): date_and_time = time.strftime("%Y%m%d-%H-%M-%S") #Stores current date and time in YYYY-MM-DD-HH:MM format vid_out_path = os.path.join(PROJECT_DIR, 'YoloV4', 'outputs', date_and_time + '.avi') #vid = cv2.VideoCapture(test_drive) #0 for webcam/Raspberry Pi Cam videothread = VideoThread(resolution=(640,480), framerate=30).start() width = int(videothread.stream.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(videothread.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(videothread.stream.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height)) #width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) #height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) #fps = int(vid.get(cv2.CAP_PROP_FPS)) #codec = cv2.VideoWriter_fourcc(*'XVID') #output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height)) frame_number = 0 freq = cv2.getTickFrequency() avg_fps = 0 #while video is running/recording while True: return_val, frame = videothread.read() #return_val, frame = vid.read() if return_val: #frame = cv2.flip(frame, -1) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video error, try another format') break frame_number += 1 #print('Frame #: ', frame_number) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data/ 255. #mage_data = np.expand_dims(frame_resized, axis = 0) #if floating_model: # image_data = (np.float32(image_data) - 127.5)/127.5 image_data = image_data[np.newaxis, ...].astype(np.float32) #Converts image data to a float32 type start_time = time.time() #TFLite Detections interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() prediction = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] #box = interpreter.get_tensor(output_details[0]['index'])[0] #scores = interpreter.get_tensor(output_details[2]['index'])[0] boxes, prediction_conf = filter_boxes(prediction[0], prediction[1], score_threshold=0.4, input_shape=tf.constant([input_size, input_size])) #Reshape = returns a new tensor that has the same values as tensor in the same order, but with a new shape given by shape #Shape = returns a 1-D integer tensor, represents the shape of the input boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores = tf.reshape(prediction_conf, (tf.shape(prediction_conf)[0], -1, tf.shape(prediction_conf)[-1])), max_output_size_per_class = 50, max_total_size = 50, iou_threshold = 0.45, score_threshold = 0.5 ) #convert the received data into numpy arrays, then slice out unused elements number_of_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0 : int(number_of_objects)] scores = scores.numpy()[0] scores = scores[0 : int(number_of_objects)] classes = classes.numpy()[0] classes = classes[0 : int(number_of_objects)] #format bounding boxes with normalized minimums and maximums of x and y original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) prediction_bbox = [bboxes, scores, classes, number_of_objects] #Read in all the class names from config and only allow certain ones to be detected (eases computation power) class_names = utils.read_class_names(cfg.YOLO.CLASSES) allowed_classes = ['traffic light', 'person', 'car', 'stop sign'] #loop through objects and get classification name, using only the ones allows in allowed_classes names = [] deleted_indx = [] for i in range(number_of_objects): classification_index = int(classes[i]) class_name = class_names[classification_index] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) #delete irrelevant detections (not in allowed_classes) bboxes = np.delete(bboxes, deleted_indx, axis = 0) scores = np.delete(scores, deleted_indx, axis = 0) #Feed tracker with encoded yolo detections detections_features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, detection_feature) for bbox, score, class_name, detection_feature in zip(bboxes, scores, names, detections_features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] #run non-maxima supression (reduces amount of detected entities to as little as possible) boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #Call tracker tracker.predict() tracker.update(detections) #update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() #if class_name == 'person': print('person found') #change frame to that which showcases the lane detection #frame = lane_detect.detect_edges(frame) #COMMENT OUT IF/WHEN ERROR OCCURS #distance approximation (barebones, needs more adjusting) cam_parameter = 18 #change with different cameras. Gets the detected distance closer to actual distance distance = (np.pi)/(bbox[2].item() + bbox[3].item()) * 1000 + cam_parameter det_dest = str(int(distance)) #draw bounded box on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(det_dest)) * 18, int(bbox[1])), color, -1) #cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) cv2.putText(frame, class_name + ": " + str(int(distance)), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) #calculate fps of running detections fps = 1.0/ (time.time() - start_time) avg_fps = avg_fps + fps #print("FPS: %.2f" % fps) cv2.putText(frame, "FPS: " + str(int(fps)), (width - 100, height - 20),0, 0.75, (255,255,255),2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.imshow("Output Video", result) output_video.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() print('Average FPS: ', (avg_fps/frame_number)) print('Number of Frames: ', frame_number) videothread.stop()
def iterate(lines, model, vid, frame_num): tracks = [] return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') cv2.destroyAllWindows() return False, tracks frame_size = frame.shape[:2] image_data = cv2.resize(frame, (FLAGS.size, FLAGS.size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [FLAGS.size, FLAGS.size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [FLAGS.size, FLAGS.size])) else: batch_data = tf.constant(image_data) pred_bbox = model.signatures['serving_default'](batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file #allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] allowed_classes = ['car', 'bus', 'truck'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() tracks.append( Rect(track.track_id, (int(bbox[0]), int(bbox[1])), (int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1])))) # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track #if FLAGS.info: # print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) for line in lines: cv2.line(frame, line.pt1, line.pt2, line.color, 3) cv2.line(frame, line.vertor_pt1, line.vertor_pt2, (255, 255, 0), 2) cv2.putText(frame, str(line.count), line.center, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.putText(frame, str(frame_num), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() return False, tracks return True, tracks
def main(yolo): global p1_flag, p2_flag, vline1, mid_line, vline2, lane_1, lane_2, lane_3, lane_4, lane_5, lane_6, mask, preset global cnt_lane_1, cnt_lane_2, cnt_lane_3, cnt_lane_4, cnt_lane_5, cnt_lane_6, global_point global speed_lane_1, speed_lane_2, speed_lane_3, speed_lane_4, speed_lane_5, speed_lane_6 ################# parameters ###################### track_len = 2 detect_interval = 4 of_track = [] preset = 0 alpha = 0.3 mm1, mm2, mm3, mm4, mm5, mm6 = 0, 0, 0, 0, 0, 0 v1, v2, v3, v4, v5, v6 = 0, 0, 0, 0, 0,0 ptn1, ptn2, ptn3, ptn4, ptn5, ptn6 = 0, 0, 0, 0, 0,0 prv1, prv2, prv3, prv4, prv5, prv6 = 0, 0, 0, 0, 0, 0 ms2kmh = 3.6 fps = 30 max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.3 counter = [] ################################################## # Deep SORT model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True #video_path = "./output/output.avi" video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) total_frame = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) fourcc = cv2.VideoWriter_fourcc(*'FMP4') video_fps = video_capture.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter('output/CH4_output.avi', fourcc, video_fps, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 frame_idx = 0 speed_dict = OrderedDict() ret, first_frame = video_capture.read() cal_mask1 = np.zeros_like(first_frame[:, :, 0]) cal_mask2 = np.zeros_like(first_frame[:, :, 0]) while True: ret, frame = video_capture.read() glob = frame.copy() cmask = frame.copy() # Channel and preset setting if ch == 8: if 0 <= frame_idx <= 480 or 2415 <= frame_idx <= 4203 or 6140 <=frame_idx<=7925 or 9864 <=frame_idx<=11648 or 13585 <= frame_idx <= 15370 or frame_idx >=17306: preset = 1 elif 559 <= frame_idx <= 2340 or 4275 <= frame_idx<=6064 or 8000 <= frame_idx<=9787 or 11730 <=frame_idx<=13513 or 15450 <=frame_idx<=17237: preset = 2 else: preset = 0 if preset == 1: vline1 = p1_vline1 mid_line = p1_mid_line vline2 = p1_vline2 lane_1 = p1_lane_1 lane_2 = p1_lane_2 lane_3 = p1_lane_3 lane_4 = p1_lane_4 lane_5 = p1_lane_5 lane_6 = p1_lane_6 global_point = p1_global_point mask = p1_mask # Polyline cv2.polylines(frame, [lane_1], True, (153,255,255)) cv2.polylines(frame, [lane_2], True, (255,204,204)) cv2.polylines(frame, [lane_3], True, (204,255,204)) cv2.polylines(frame, [lane_4], True, (255,204,255)) cv2.polylines(frame, [lane_5], True, (153,153,255)) cv2.polylines(frame, [lane_6], True, (102,255,153)) frame = cv2.line(frame, vline1[0], vline1[1], (0,0,255), 1) frame = cv2.line(frame, mid_line[0], mid_line[1], (0,0,255), 1) frame = cv2.line(frame, vline2[0], vline2[1], (0,0,255), 1) p1_flag = True view_polygon = np.array([[10, 1920], [380, 250], [680, 250], [1080, 480], [1080, 1920]]) cal_polygon = np.array([[361, 304], [755, 293], [1076, 480], [1077, 1067], [163, 1051]]) pg1 = np.array([[382, 347], [359, 347], [236, 833], [272, 832]]) # RT, LT, LB, RB pg2 = np.array([[460, 347], [434, 346], [456, 833], [505, 833]]) # LB, RT, LT, RB pg3 = np.array([[544, 345], [514, 345], [686, 833], [755, 832]]) # LB, LT, RT, RB pg4 = np.array([[630, 342], [598, 343], [924, 829], [991, 829]]) # LB, LT, LB, RB pg5 = np.array([[725, 343], [696, 345], [996, 650], [1056, 646]]) # RT, LB, LT, RB pg6 = np.array([[798, 340], [761, 340], [1037, 535], [1070, 530]]) # RT, LB, LT, RB cv2.fillConvexPoly(cal_mask1, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask1) if pg1.size>0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size>0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size>0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size>0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size>0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size>0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) elif preset == 2: vline1 = p2_vline1 mid_line = p2_mid_line vline2 = p2_vline2 lane_1 = p2_lane_1 lane_2 = p2_lane_2 lane_3 = p2_lane_3 lane_4 = p2_lane_4 lane_5 = p2_lane_5 lane_6 = p2_lane_6 global_point = p2_global_point mask = p2_mask # Polyline cv2.polylines(frame, [lane_1], True, (153,255,255)) cv2.polylines(frame, [lane_2], True, (255,204,204)) cv2.polylines(frame, [lane_3], True, (204,255,204)) cv2.polylines(frame, [lane_4], True, (255,204,255)) cv2.polylines(frame, [lane_5], True, (153,153,255)) cv2.polylines(frame, [lane_6], True, (102,255,153)) frame = cv2.line(frame, vline1[0], vline1[1], (0,0,255), 1) frame = cv2.line(frame, mid_line[0], mid_line[1], (0,0,255), 1) frame = cv2.line(frame, vline2[0], vline2[1], (0,0,255), 1) p2_flag = True view_polygon = np.array([[284, 649], [0, 1629], [1076, 1574], [1079, 888], [676, 634]]) cal_polygon = np.array([[896, 778], [244, 794], [105, 1271], [1077, 1245], [1077, 879]]) pg1 = np.array([[276, 846], [234, 847], [134, 1200], [199, 1198]]) # RT, LT, LB, RB pg2 = np.array([[418, 844], [375, 844], [384, 1196], [442, 1198]]) # LB, RT, LT, RB pg3 = np.array([[553, 843], [508, 844], [637, 1194], [706, 1194]]) # LB, LT, RT, RB pg4 = np.array([[686, 841], [637, 843], [886, 1190], [968, 1189]]) # LB, LT, LB, RB pg5 = np.array([[817, 837], [773, 841], [1005, 1051], [1060, 1047]]) # RT, LB, LT, RB pg6 = np.array([[966, 837], [919, 840], [1043, 929], [1087, 927]]) # RT, LT, LB, RB cv2.fillConvexPoly(cal_mask2, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask2) if pg1.size > 0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size > 0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size > 0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size > 0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size > 0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size > 0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) elif ch == 4: if 0 <= frame_idx <= 1751 or frame_idx >= 3655: preset = 1 elif 1797 <= frame_idx <= 3600: preset = 2 else: preset = 0 if preset == 1: lane_1 = p1_lane_1 lane_2 = p1_lane_2 lane_3 = p1_lane_3 lane_4 = p1_lane_4 lane_5 = p1_lane_5 lane_6 = p1_lane_6 global_point = p1_global_point mask = p1_mask # Polyline # cv2.polylines(frame, [lane_1], True, (153,255,255)) # cv2.polylines(frame, [lane_2], True, (255,204,204)) # cv2.polylines(frame, [lane_3], True, (204,255,204)) # cv2.polylines(frame, [lane_4], True, (255,204,255)) # cv2.polylines(frame, [lane_5], True, (153,153,255)) # cv2.polylines(frame, [lane_6], True, (102,255,153)) p1_flag = True view_polygon = np.array([[731, 563], [385, 567], [33, 1260], [1077, 1254], [1078, 812]]) cal_polygon = np.array([[914, 669],[286, 675], [89, 1083], [1078, 1083], [1078, 772]]) pg6 = np.array([[346, 686], [313, 686], [163, 992], [244, 996]]) # RT, LT, LB, RB pg5 = np.array([[430, 684], [401, 685], [338, 998], [420, 1000]]) # LB, RT, LT, RB pg4 = np.array([[534, 685], [506, 685], [547, 999], [631, 999]]) # LB, LT, RT, RB pg3 = np.array([[654, 685], [609, 684], [760, 1000], [839, 999]]) # LB, LT, LB, RB pg2 = np.array([[770, 685], [723, 684], [979, 999], [1051, 998]]) # RT, LB, LT, RB pg1 = np.array([[858, 683], [815, 683], [1031, 860], [1077, 857]]) # RT, LB, LT, RB cv2.fillConvexPoly(cal_mask1, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask1) if pg1.size > 0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size > 0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size > 0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size > 0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size > 0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size > 0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) elif preset == 2: lane_1 = p2_lane_1 lane_2 = p2_lane_2 lane_3 = p2_lane_3 lane_4 = p2_lane_4 lane_5 = p2_lane_5 lane_6 = p2_lane_6 global_point = p2_global_point mask = p2_mask # Polyline # cv2.polylines(frame, [lane_1], True, (153,255,255)) # cv2.polylines(frame, [lane_2], True, (255,204,204)) # cv2.polylines(frame, [lane_3], True, (204,255,204)) # cv2.polylines(frame, [lane_4], True, (255,204,255)) # cv2.polylines(frame, [lane_5], True, (153,153,255)) # cv2.polylines(frame, [lane_6], True, (102,255,153)) p2_flag = True view_polygon = np.array([[547, 609], [0, 1109], [1, 1271], [1078, 1278], [1079, 594]]) cal_polygon = np.array([[529, 611], [8, 1105], [1077, 1110], [1078, 599]]) pg6 = np.array([[556, 609], [493, 607], [108, 1033], [190, 1030]]) # RT, LT, LB, RB pg5 = np.array([[693, 604], [642, 602], [356, 1020], [455, 1020]]) # LB, RT, LT, RB pg4 = np.array([[812, 633], [765, 633], [604, 1026], [702, 1026]]) # LB, LT, RT, RB pg3 = np.array([[932, 638], [882, 636], [883, 1007], [953, 1001]]) # LB, LT, LB, RB pg2 = np.array([[1059, 641], [978, 638], [1028, 941], [1079, 916]]) # RT, LB, LT, RB pg1 = np.array([]) cv2.fillConvexPoly(cal_mask2, cal_polygon, 1) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_gray = cv2.bitwise_and(frame_gray, frame_gray, mask=cal_mask2) if pg1.size > 0: cv2.fillPoly(cmask, [pg1], (120, 0, 120), cv2.LINE_AA) if pg2.size > 0: cv2.fillPoly(cmask, [pg2], (120, 120, 0), cv2.LINE_AA) if pg3.size > 0: cv2.fillPoly(cmask, [pg3], (0, 120, 120), cv2.LINE_AA) if pg4.size > 0: cv2.fillPoly(cmask, [pg4], (80, 0, 255), cv2.LINE_AA) if pg5.size > 0: cv2.fillPoly(cmask, [pg5], (255, 0, 80), cv2.LINE_AA) if pg6.size > 0: cv2.fillPoly(cmask, [pg6], (120, 0, 0), cv2.LINE_AA) if ret != True: break t1 = time.time() cnt_lane_1 = cnt_lane_2 = cnt_lane_3 = cnt_lane_4 = cnt_lane_5 = cnt_lane_6 = 0 image = Image.fromarray(frame[...,::-1]) #bgr to rgb boxs,class_names = yolo.detect_image(image) #features is 128-dimension vector for each bounding box features = encoder(frame,boxs) # score to 1.0 here. # score 1.0 means that bbox's class score after yolo is 100% detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) # detections is object detection result at current frame # it may contain many bbox or no bbox detections = [detections[i] for i in indices] # calculate lane by lane avg speed ## swhan method (optical flow) mm1, mm2, mm3, mm4, mm5, mm6 = 0, 0, 0, 0, 0,0 if len(of_track) > 0: img0, img1 = prev_gray, frame_gray p0 = np.float32([tr[-1] for tr in of_track]).reshape(-1, 1, 2) p1, _st, _err = cv2.calcOpticalFlowPyrLK(img0, img1, p0, None, **lk_params) p0r, _st, _err = cv2.calcOpticalFlowPyrLK(img1, img0, p1, None, **lk_params) d = abs(p0-p0r).reshape(-1, 2).max(-1) good = d < 1 new_of_tracks = [] for tr, (x, y), good_flag in zip(of_track, p1.reshape(-1, 2), good): if not good_flag: continue tr.append((x, y)) if len(tr) > track_len: del tr[0] new_of_tracks.append(tr) cv2.circle(frame, (x, y), 3, (0, 255, 0), -1) of_track = new_of_tracks for idx, tr in enumerate(of_track): #print(frame_idx, tr) if pg1.size > 0: result_pg1 = cv2.pointPolygonTest(pg1, tr[0],True) else: result_pg1 = -999 if pg2.size > 0: result_pg2 = cv2.pointPolygonTest(pg2, tr[0], True) else: result_pg2 = -999 if pg3.size > 0: result_pg3 = cv2.pointPolygonTest(pg3, tr[0], True) else: result_pg3 = -999 if pg4.size > 0: result_pg4 = cv2.pointPolygonTest(pg4, tr[0], True) else: result_pg5 = -999 if pg5.size > 0: result_pg5 = cv2.pointPolygonTest(pg5, tr[0], True) else: result_pg5 = -999 if pg6.size > 0: result_pg6 = cv2.pointPolygonTest(pg6, tr[0], True) else: result_pg6 = -999 if frame_idx % detect_interval == 0: if result_pg1 > 0: ptn1 += 1 if preset == 1: mm1 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm1 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm1 = mm1/ptn1 v1 = mmm1*fps*ms2kmh*6 if result_pg2 > 0: ptn2 += 1 if preset == 1: mm2 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm2 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm2 = mm2/ptn2 v2 = mmm2*fps*ms2kmh*6 if result_pg3 > 0: ptn3 += 1 if preset == 1: mm3 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm3 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm3 = mm3/ptn3 v3 = mmm3*fps*ms2kmh*6 if result_pg4 > 0: ptn4 += 1 if preset == 1: mm4 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm4 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm4 = mm4/ptn4 v4 = mmm4*fps*ms2kmh*6 if result_pg5 > 0: ptn5 += 1 if preset == 1: mm5 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm5 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm5 = mm5/ptn5 v5 = mmm5*fps*ms2kmh*6 if result_pg6 > 0: ptn6 += 1 if preset == 1: mm6 += convPtoR_1(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) elif preset == 2: mm6 += convPtoR_2(tr[0][0],tr[0][1],tr[1][0],tr[1][1]) mmm6 = mm6/ptn6 v6 = mmm6*fps*ms2kmh*6 mask = np.zeros_like(frame_gray) mask[:] = 255 for x, y in [np.int32(tr[-1]) for tr in of_track]: cv2.circle(mask, (x, y), 3, 0, -1) p = cv2.goodFeaturesToTrack(frame_gray, mask = mask, **feature_params) if p is not None: for x, y in np.float32(p).reshape(-1, 2): of_track.append([(x, y)]) prev_gray = frame_gray ## swhan method if frame_idx % detect_interval == 0: if ptn1 > 0: avg_speed_lane_1 = v1 prv1=v1 elif ptn1 == 0: avg_speed_lane_1 = 0 prv1 = 0 if ptn2 > 0: avg_speed_lane_2 = v2 prv2=v2 elif ptn2 == 0: avg_speed_lane_2 = 0 prv2 = 0 if ptn3 > 0: avg_speed_lane_3 = v3 prv3=v3 elif ptn3 == 0: avg_speed_lane_3 = 0 prv3 = 0 if ptn4 > 0: avg_speed_lane_4 = v4 prv4=v4 elif ptn4 == 0: avg_speed_lane_4 = 0 prv4 = 0 if ptn5 > 0: avg_speed_lane_5 = v5 prv5=v5 elif ptn5 == 0: avg_speed_lane_5 = 0 prv5 = 0 if ptn6 > 0: avg_speed_lane_6 = v6 prv6=v6 elif ptn6 == 0: avg_speed_lane_6 = 0 prv6 = 0 else: avg_speed_lane_1 = prv1 avg_speed_lane_2 = prv2 avg_speed_lane_3 = prv3 avg_speed_lane_4 = prv4 avg_speed_lane_5 = prv5 avg_speed_lane_6 = prv6 ptn1, ptn2, ptn3, ptn4, ptn5, ptn6 = 0, 0, 0, 0, 0, 0 # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] for det in detections: bbox = det.to_tlbr() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) if len(class_names) > 0: class_name = class_names[0] i += 1 center = (int(((bbox[0])+(bbox[2]))/2),int(((bbox[1])+(bbox[3]))/2)) # global point matching track.matching_point[0] = center[0] track.matching_point[1] = center[1] temp_list = [track.matching_point[0],track.matching_point[1],frame_idx] if len(track.matching_point_list) == 4: track.matching_point_list.append(temp_list) x1 = track.matching_point_list[0][0] y1 = track.matching_point_list[0][1] x2 = track.matching_point_list[-1][0] y2 = track.matching_point_list[-1][1] # If the pixel don't change, the speed should be zero. if x1 == x2 and y1 == y2: track.matching_point_list.pop(0) else: time1 = track.matching_point_list[0][2] time2 = track.matching_point_list[-1][2] if preset == 1: R_dist1 = convPtoR_1(x1,y1,x2,y2) t_time1 = (time2-time1)/30 speed = int(3.6*R_dist1 //t_time1) #print("time1 : ",time1, "time2 : ",time2) elif preset == 2: R_dist1 = convPtoR_2(x1,y1,x2,y2) t_time1 = (time2-time1)/30 speed = int(3.6*R_dist1 //t_time1) #print("time1 : ",time1, "time2 : ",time2) track.matching_point_list.pop(0) if frame_idx % 6 ==1 : track.speed = speed cv2.putText(frame, str(int(track.speed))+'km/h', (int(bbox[0]), int(bbox[1]+((bbox[3]-bbox[1])/2))),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 4) elif len(track.matching_point_list) == 0 : track.matching_point_list.append(temp_list) elif track.matching_point_list[-1][0] != track.matching_point[0] and track.matching_point_list[-1][1] != track.matching_point[1]: track.matching_point_list.append(temp_list) cv2.circle(frame, (track.matching_point[0], track.matching_point[1]), 5, (0,0,255),-1) # traffic lane by lane if cv2.pointPolygonTest(lane_1, center, False) >= 0: cnt_lane_1 += 1 track.driving_lane = 1 elif cv2.pointPolygonTest(lane_2, center, False) >= 0: cnt_lane_2 += 1 track.driving_lane = 2 elif cv2.pointPolygonTest(lane_3, center, False) >= 0: cnt_lane_3 += 1 track.driving_lane = 3 elif cv2.pointPolygonTest(lane_4, center, False) >= 0: cnt_lane_4 += 1 track.driving_lane = 4 elif cv2.pointPolygonTest(lane_5, center, False) >= 0: cnt_lane_5 += 1 track.driving_lane = 5 elif cv2.pointPolygonTest(lane_6, center, False) >= 0: cnt_lane_6 += 1 track.driving_lane = 6 cv2.putText(frame, "ID:"+str(track.track_id) + "/"+ str(track.driving_lane), (int(bbox[0]), int(bbox[1])-20),cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2) pts[track.track_id].append(center) ctime[track.time].append(time.time()) thickness = 5 #center point cv2.circle(frame, (center), 1, color, thickness) #draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness) # cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2) cv2.putText(frame, "Current Object Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2) cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 200, (0,255,0),3) cv2.putText(frame, "Traffic, Avg_speed", (570,40), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_1: " + str(cnt_lane_1)+', '+str(int(avg_speed_lane_1)), (500,80), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_2: " + str(cnt_lane_2)+', '+str(int(avg_speed_lane_2)), (500,120), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_3: " + str(cnt_lane_3)+', '+str(int(avg_speed_lane_3)), (500,160), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_4: " + str(cnt_lane_4)+', '+str(int(avg_speed_lane_4)), (500,200), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_5: " + str(cnt_lane_5)+', '+str(int(avg_speed_lane_5)), (500,240), 0, 1, (0,255,0), 2) cv2.putText(frame, "Lane_6: " + str(cnt_lane_6)+', '+str(int(avg_speed_lane_6)), (500,280), 0, 1, (0,255,0), 2) frame_idx += 1 if writeVideo_flag: #save a frame # cv2.fillPoly(frame, [mask], (0,0,0)) # for i,v in enumerate(global_point): # cv2.circle(frame, v, 1, (0,255,255),-1) cv2.addWeighted(cmask, alpha, frame, 1 - alpha, 0, frame) out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 print(" ") print("[Finish]") end = time.time() video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
# av_time = 1 # continue # else: # av_time +=end # ccnt +=1 # continue detections = [] for bbox, score, classe, color, feature in zip(boxes, scores, classes, colors, features): detections.append(Detection(bbox, score, classe, color, feature)) # 运行非最大值抑制 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.score for d in detections]) indices = preprocessing.non_max_suppression(boxes, 1.0, scores) detections = [detections[i] for i in indices] start_tr = time.time() # 追踪器刷新 tracker.predict() tracker.update(detections) print("tracker 更新:", float(time.time() - start_tr)) # 遍历绘制跟踪信息 track_count = 0 track_total = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue y1, x1, y2, x2 = np.array(track.to_tlbr(), dtype=np.int32)
def process_frame(dataset): # Definition of the parameters to_process = {} max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 producer = KafkaProducer( bootstrap_servers='master:6667', value_serializer=lambda m: json.dumps(m).encode('utf8')) data = dataset.collect() # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) dt_now = dt.datetime.now() for datum in data: event = json.loads(datum[1]) dt_event = dt.datetime.strptime(event['timestamp'], '%Y-%m-%dT%H:%M:%S.%f') delta = dt_now - dt_event print("timestamp = " + str(dt_event)) if delta.seconds > 5: continue to_process[event['camera_id']] = event for key, event in to_process.items(): t1 = time.time() event = json.loads(datum[1]) try: decoded = base64.b64decode(event['image']) except TypeError: return filename = '/home/haohsiang/Vigilancia-Distributed/codev1frame.jpg' # I assume you have a way of picking unique filenames with open(filename, 'wb') as f: f.write(decoded) frame = cv2.imread(filename) #ret, frame = video_capture.read() # frame shape 640*480*3 image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) print("box_num", len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) #sent result to kafka if len(boxs) > 0: print( str(track.track_id) + ' :' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])) print(dt.datetime.now().time()) result = { 'ID': str(track.track_id), 'timestamp': dt.datetime.now().isoformat(), 'location_x': str(bbox[0]), 'w': str(bbox[2]) } producer.send('position', result) producer.flush() # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(yolo): # Determining the FPS of a video having variable frame rate # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos filename = "clip1.mp4" # Determining the total duration of the video clip = VideoFileClip(filename) cap2 = cv2.VideoCapture(filename) co = 0 ret2 = True while ret2: ret2, frame2 = cap2.read() # Determining the total number of frames co += 1 cap2.release() # Computing the average FPS of the video Input_FPS = co / clip.duration # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 frame_count = 0 # Implementing Deep Sort algorithm model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # Cosine distance is used as the metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(filename) # Define the codec and create a VideoWriter object to save the output video out = cv2.VideoWriter( 'output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS, (int(video_capture.get(3)), int(video_capture.get(4)))) # To calculate the frames processed by the deep sort algorithm per second fps = 0.0 # Initializing empty variables for counting and tracking purpose queue_track_dict = {} # Count time in queue queue_track_dict_2 = {} alley_track_dict = {} # Count time in alley store_track_dict = {} # Count total time in store latest_frame = {} # Track the last frame in which a person was identified reidentified = { } # Yes or No : whether the person has been re-identified at a later point in time plot_head_count_store = [] # y-axis for Footfall Analysis plot_head_count_queue = [] # y-axis for Footfall Analysis plot_time = [] # x-axis for Footfall Analysis # Loop to process each frame and track people while True: ret, frame = video_capture.read() if ret != True: break maxIntensity = 255.0 phi = 1 theta = 1 newImage1 = (maxIntensity / phi) * (frame / (maxIntensity / theta))**1.3 frame = array(newImage1, dtype=uint8) cv2.imwrite('testing.jpg', frame) if frame_count == 5000: break head_count_store = 0 head_count_queue = 0 t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # BGR to RGB conversion boxs = yolo.detect_image(image) features = encoder(frame, boxs) # Getting the detections having score of 0.0 to 1.0 detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression on the bounding boxes boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker to associate tracking boxes to detection boxes tracker.predict() tracker.update(detections) # Defining the co-ordinates of the area of interest pts3 = np.array([[133, 120], [303, 120], [297, 175], [127, 175]], np.int32) pts3 = pts3.reshape((-1, 1, 2)) pts2 = np.array( [[380, 250], [380, 365], [175, 480], [0, 480], [0, 380]], np.int32) pts2 = pts2.reshape((-1, 1, 2)) # Queue Area - 1 pts = np.array([[0, 380], [0, 0], [640, 0], [640, 480], [170, 480], [380, 360], [380, 250]], np.int32) pts = pts.reshape((-1, 1, 2)) # Alley Region cv2.polylines(frame, [pts], True, (255, 0, 255), thickness=1) cv2.polylines(frame, [pts3], True, (248, 197, 39), thickness=2) cv2.polylines(frame, [pts2], True, (0, 255, 255), thickness=2) # Drawing tracker boxes and frame count for people inside the areas of interest for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # Checking if the person is within an area of interest queue_point_test_2 = center_point_inside_polygon(bbox, pts3) queue_point_test = center_point_inside_polygon(bbox, pts2) alley_point_test = center_point_inside_polygon(bbox, pts) # Checking if a person has been reidentified in a later frame if queue_point_test == 'inside' or alley_point_test == 'inside' or queue_point_test_2 == 'inside': if track.track_id in latest_frame.keys(): if latest_frame[track.track_id] != frame_count - 1: reidentified[track.track_id] = 1 # Initializing variables incase a new person has been seen by the model if queue_point_test == 'inside' or alley_point_test == 'inside' or queue_point_test_2 == 'inside': head_count_store += 1 if track.track_id not in store_track_dict.keys(): store_track_dict[track.track_id] = 0 queue_track_dict[track.track_id] = 0 queue_track_dict_2[track.track_id] = 0 alley_track_dict[track.track_id] = 0 reidentified[track.track_id] = 0 # Processing for people inside the Queue Area - 1 if queue_point_test == 'inside': head_count_queue += 1 queue_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 255), 2) cv2.rectangle(frame, (int(bbox[0]) - 1, int(bbox[1]) - 15), (int(bbox[2]) + 1, int(bbox[1])), (0, 255, 255), -1) wait_time = round( (queue_track_dict[track.track_id] / Input_FPS), 2) cv2.putText(frame, str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 5, 0.9, (0, 0, 0), 1) # Processing for people inside the Queue Area - 2 if queue_point_test_2 == 'inside': head_count_queue += 1 queue_track_dict_2[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (248, 197, 39), 2) cv2.rectangle(frame, (int(bbox[0]) - 1, int(bbox[1]) - 15), (int(bbox[2]) + 1, int(bbox[1])), (248, 197, 39), -1) wait_time = round( (queue_track_dict_2[track.track_id] / Input_FPS), 2) cv2.putText(frame, str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 5, 0.9, (0, 0, 0), 1) # Processing for people inside the Alley Region if alley_point_test == 'inside': alley_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4) # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2) # Getting the total Store time for a person if track.track_id in store_track_dict.keys(): store_track_dict[track.track_id] = queue_track_dict[ track.track_id] + alley_track_dict[track.track_id] # Drawing bounding box detections for people inside the store for det in detections: bbox = det.to_tlbr() # Checking if the person is within an area of interest queue_point_test = center_point_inside_polygon(bbox, pts) alley_point_test = center_point_inside_polygon(bbox, pts2) # if queue_point_test == 'inside' or alley_point_test == 'inside': # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2) # Video Overlay - Head Count Data at that instant cv2.putText(frame, "Count: " + str(head_count_store), (30, 610), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Count: " + str(head_count_store), (30, 610), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in queue total_people = len([v for v in queue_track_dict.values() if v > 0]) total_queue_frames = sum(v for v in queue_track_dict.values() if v > 0) avg_queue_frames = 0 if total_people != 0: avg_queue_frames = total_queue_frames / total_people avg_queue_time = round((avg_queue_frames / Input_FPS), 2) # Video Overlay - Average Wait Time in Queue cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', (30, 690), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', (30, 690), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in the store total_people = len(store_track_dict) total_store_frames = sum(store_track_dict.values()) avg_store_frames = 0 if total_people != 0: avg_store_frames = total_store_frames / total_people avg_store_time = round((avg_store_frames / Input_FPS), 2) # Video Overlay - Average Store time cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', (30, 650), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', (30, 650), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Write the frame onto the VideoWriter object out.write(frame) # Calculating the frames processed per second by the model fps = (fps + (1. / (time.time() - t1))) / 2 frame_count += 1 # Printing processing status to track completion op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str( round(fps, 2)) print("\r" + op, end="") # Adding plot values for Footfall Analysis every 2 seconds (hard coded for now) if frame_count % 50 == 0: plot_time.append(round((frame_count / Input_FPS), 2)) plot_head_count_store.append(head_count_store) plot_head_count_queue.append(head_count_queue) # Press Q to stop the video if cv2.waitKey(1) & 0xFF == ord('q'): break # Defining data to be written into the csv file - Detailed Report csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time'] csv_data = [] csv_row = {} detailed_csv_file = 'Detailed_Store_Report.csv' queue_unique_count = 0 for k, v in store_track_dict.items(): csv_row = {} csv_row = { csv_columns[0]: k, csv_columns[1]: round((queue_track_dict[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2) } if round((queue_track_dict[k] / Input_FPS), 2) > 1: queue_unique_count += 1 csv_data.append(csv_row) # Writing the data into the csv file - Detailed Report with open(detailed_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in csv_data: writer.writerow(data) # Defining data to be written into the csv file - Detailed Report csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time'] csv_data = [] csv_row = {} detailed_csv_file = 'Detailed_Store_Report_2.csv' queue_unique_count = 0 for k, v in store_track_dict.items(): csv_row = {} csv_row = { csv_columns[0]: k, csv_columns[1]: round((queue_track_dict_2[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2) } if round((queue_track_dict_2[k] / Input_FPS), 2) > 1: queue_unique_count += 1 csv_data.append(csv_row) # Writing the data into the csv file - Detailed Report with open(detailed_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in csv_data: writer.writerow(data) # Defining data to be written into the csv file - Brief Report csv_columns_brief = [ 'Total Head Count', 'Total Queue Time', 'Average Queue Time', 'Total Store Time', 'Average Store Time' ] brief_csv_file = 'Brief_Store_Report.csv' csv_data_brief = { csv_columns_brief[0]: queue_unique_count, csv_columns_brief[1]: round((sum(queue_track_dict.values()) / Input_FPS), 2), csv_columns_brief[2]: avg_queue_time, csv_columns_brief[3]: round((sum(store_track_dict.values()) / Input_FPS), 2), csv_columns_brief[4]: avg_store_time } # Writing the data into the csv file - Brief Report with open(brief_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns_brief) writer.writeheader() writer.writerow(csv_data_brief) # Releasing objects created video_capture.release() out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break; t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update >1 : continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()