def main(): input_video_filepath = sys.argv[1] write_video_flag = False output_video_filepath = "" if len(sys.argv) > 2: output_video_filepath = sys.argv[2] write_video_flag = True async_video_flag = False config = OmegaConf.load("config.yaml") detector = LineCrossingDetector(config) counters = [PeopleCounter(**c) for c in config.people_counter] if async_video_flag: video_capture = VideoCaptureAsync(input_video_filepath) else: video_capture = cv2.VideoCapture(input_video_filepath) if async_video_flag: video_capture.start() if write_video_flag: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*"XVID") output_writer = cv2.VideoWriter(output_video_filepath, fourcc, 30, (w, h)) frame_index = 0 pbar = tqdm(total=int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))) while True and frame_index < 12000: ret, frame = video_capture.read() frame_index = frame_index + 1 if not ret: break if frame_index < 10000: continue detections = detector.detect(frame, visualize=True) for counter in counters: counter.update(detections, frame_index) counter.visualize(frame) for d in detections: print( f"Frame: {frame_index}. Track id: {d.track_id}. Line id: {d.line_id}" ) if write_video_flag: output_writer.write(frame) pbar.update() if async_video_flag: video_capture.stop() else: video_capture.release() if write_video_flag: output_writer.release()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 1.0 output_format = 'mp4' initialize_door_by_yourself = False door_array = None # Deep SORT model_filename = '../model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False error_values = [] check_gpu() files = sorted(os.listdir('data_files/videos')) for video_name in files: print("opening video: {}".format(video_name)) file_path = join('data_files/videos', video_name) output_name = 'save_data/out_' + video_name[0:-3] + output_format counter = Counter(counter_in=0, counter_out=0, track_id=0) truth = get_truth(video_name) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_name, fourcc, 15, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() all_doors = read_door_info('data_files/doors_info_links.json') door_array = all_doors[video_name] rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) border_door = door_array[3] while True: ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: y1 = (counter.counter_in - truth.inside)**2 y2 = (counter.counter_out - truth.outside)**2 total_count = counter.return_total_count() true_total = truth.inside + truth.outside if true_total != 0: err = abs(total_count - true_total) / true_total else: err = abs(total_count - true_total) mse = (y1 + y2) / 2 log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \ " total: {} / {}\n error: {}\n mse error: {}\n______________\n".format(video_name, counter.counter_in, truth.inside, counter.counter_out, truth.outside, total_count, true_total, err, mse) with open('../log_results.txt', 'a') as log: log.write(log_res) print(log_res) error_values.append(err) break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 3) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: score = "%.2f" % (det.confidence * 100) + "%" # rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) # rect_door = Rectangle( int(door_array[0]), int(door_array[1]), int(door_array[2]), int(door_array[3]) ) # intersection = rect_head & rect_door # # if intersection: # squares_coeff = rect_square(*intersection)/ rect_square(*rect_head) # cv2.putText(frame, score + " inter: " + str(round(squares_coeff, 3)), (int(bbox[0]), int(bbox[3])), 0, # 1e-3 * frame.shape[0], (0, 100, 255), 5) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # first appearence of object with id=track.id if track.track_id not in counter.people_init or counter.people_init[ track.track_id] == 0: counter.obj_initialized(track.track_id) rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) intersection = rect_head & rect_door if intersection: intersection_square = rect_square(*intersection) head_square = rect_square(*rect_head) rat = intersection_square / head_square # 1e-3 * frame.shape[0], (0, 100, 255), 5) # was initialized in door, probably going in if rat >= 0.7: counter.people_init[track.track_id] = 2 # initialized in the bus, mb going out elif rat <= 0.4 or bbox[3] > border_door: counter.people_init[track.track_id] = 1 # initialized between the exit and bus, not obvious state elif rat > 0.4 and rat < 0.7: counter.people_init[track.track_id] = 3 counter.rat_init[track.track_id] = rat # res is None, means that object is not in door contour else: counter.people_init[track.track_id] = 1 counter.people_bbox[track.track_id] = bbox counter.cur_bbox[track.track_id] = bbox adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 5) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) id_get_lost = [ track.track_id for track in tracker.tracks if track.time_since_update >= 35 ] # and track.age >= 29] # id_inside_tracked = [track.track_id for track in tracker.tracks if track.age > 60] for val in counter.people_init.keys(): # check bbox also inter_square = 0 cur_square = 0 ratio = 0 cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1]) if val in id_get_lost and counter.people_init[val] != -1: rect_сur = Rectangle(counter.cur_bbox[val][0], counter.cur_bbox[val][1], counter.cur_bbox[val][2], counter.cur_bbox[val][3]) inter = rect_сur & rect_door if inter: inter_square = rect_square(*inter) cur_square = rect_square(*rect_сur) try: ratio = inter_square / cur_square except ZeroDivisionError: ratio = 0 # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if vector_person[1] > 70 and counter.people_init[val] == 2 \ and ratio < 0.9: # and counter.people_bbox[val][3] > border_door \ counter.get_in() elif vector_person[1] < -70 and counter.people_init[val] == 1 \ and ratio >= 0.6: counter.get_out() elif vector_person[1] < -70 and counter.people_init[val] == 3 \ and ratio > counter.rat_init[val] and ratio >= 0.6: counter.get_out() elif vector_person[1] > 70 and counter.people_init[val] == 3 \ and ratio < counter.rat_init[val] and ratio < 0.6: counter.get_in() counter.people_init[val] = -1 del val ins, outs = counter.show_counter() cv2.rectangle(frame, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * frame.shape[0], (255, 255, 255), 3) cv2.namedWindow('video', cv2.WINDOW_NORMAL) cv2.resizeWindow('video', 1422, 800) cv2.imshow('video', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f" % fps) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows() del door_array mean_error = np.mean(error_values) print("mean error for {} videos: {}".format(len(files), mean_error))
def run(self): asyncVideo_flag = self.args.asyncVideo_flag writeVideo_flag = self.args.writeVideo_flag if asyncVideo_flag: video_capture = VideoCaptureAsync(self.video_path) else: video_capture = cv2.VideoCapture(self.video_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() count_frame = 0 while True: count_frame += 1 t1 = time.time() ret, frame = video_capture.read() if ret != True: break _frame = frame # process frame = self.process(frame, _frame, count_frame) # visualize if self.args.visualize: frame = imutils.resize(frame, width=1000) cv2.imshow("Final result", frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 1.0 output_format = 'mp4' # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False fpeses = [] check_gpu() video_name = 'test1.mp4' print("opening video: {}".format(video_name)) file_path = join('data_files/videos', video_name) output_name = 'save_data/out_' + video_name[0:-3] + output_format counter = Counter(counter_in=0, counter_out=0, track_id=0) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) if writeVideo_flag: fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_name, fourcc, 15, (w, h)) left_array = [0, 0, w / 2, h] fps = 0.0 fps_imutils = imutils.video.FPS().start() rect_left = Rectangle(left_array[0], left_array[1], left_array[2], left_array[3]) border_door = left_array[3] while True: ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: with open('log_results.txt', 'a') as log: log.write('1') break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.rectangle(frame, (int(left_array[0]), int(left_array[1])), (int(left_array[2]), int(left_array[3])), (23, 158, 21), 3) if len(detections) != 0: counter.someone_inframe() for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: score = "%.2f" % (det.confidence * 100) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3) else: if counter.need_to_clear(): counter.clear_all() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() if track.track_id not in counter.people_init or counter.people_init[ track.track_id] == 0: counter.obj_initialized(track.track_id) ratio_init = find_ratio_ofbboxes(bbox=bbox, rect_compare=rect_left) if ratio_init > 0: if ratio_init >= 0.8 and bbox[3] < left_array[3]: counter.people_init[ track.track_id] = 2 # man in left side elif ratio_init < 0.8 and bbox[3] > left_array[ 3]: # initialized in the bus, mb going out counter.people_init[track.track_id] = 1 else: counter.people_init[track.track_id] = 1 counter.people_bbox[track.track_id] = bbox counter.cur_bbox[track.track_id] = bbox adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1]) + 50), 0, 1e-3 * frame.shape[0], (0, 255, 0), 5) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) id_get_lost = [ track.track_id for track in tracker.tracks if track.time_since_update >= 5 ] # TODO clear people_init and other dicts for val in counter.people_init.keys(): ratio = 0 cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1]) if val in id_get_lost and counter.people_init[val] != -1: ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val], rect_compare=rect_left) if vector_person[0] > 200 and counter.people_init[val] == 2 \ and ratio < 0.7: # and counter.people_bbox[val][3] > border_door \ counter.get_out() print(vector_person[0], counter.people_init[val], ratio) elif vector_person[0] < -100 and counter.people_init[val] == 1 \ and ratio >= 0.7: counter.get_in() print(vector_person[0], counter.people_init[val], ratio) counter.people_init[val] = -1 del val ins, outs = counter.show_counter() cv2.rectangle(frame, (700, 0), (950, 50), (0, 0, 0), -1, 8) cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (710, 35), 0, 1e-3 * frame.shape[0], (255, 255, 255), 3) cv2.namedWindow('video', cv2.WINDOW_NORMAL) # cv2.resizeWindow('video', 1422, 800) cv2.imshow('video', frame) if writeVideo_flag: out.write(frame) fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % fps) if len(fpeses) < 15: fpeses.append(round(fps, 2)) elif len(fpeses) == 15: # fps = round(np.median(np.array(fpeses))) median_fps = float(np.median(np.array(fpeses))) fps = round(median_fps, 1) print('max fps: ', fps) fps = 20 counter.fps = fps fpeses.append(fps) if cv2.waitKey(1) & 0xFF == ord('q'): break if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 frame_wid = 1280 frame_hei = 720 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False # 存放所有视频数据的上级目录 #root_video_path = '/home/yhzn/ws/clip/' root_video_path = '/home/yhzn/ws/citywalks/' all_video_path = [] read_video(root_video_path, all_video_path) break_flag = 0 result = [] myvideo_result = [] print("Video number = ", len(all_video_path)) just_video_cnt = 0 # 依次便利所有视频的路径并读取,each_video代表的是str类型路径 for each_video in all_video_path: tracker = Tracker(metric) file_path = each_video # 将城市名与视频编号识别出来 video_name_split = file_path.split('/') city_name = video_name_split[-2] video_number = video_name_split[-1] print(city_name, video_number) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() frame_num = 0 # 确认保存路径的存在 city_dir = ''.join(['/home/yhzn/ws/crop_image/', city_name]) if not os.path.exists(city_dir): os.mkdir(city_dir) video_dir = ''.join( ['/home/yhzn/ws/crop_image/', city_name, '/', video_number]) if not os.path.exists(video_dir): os.mkdir(video_dir) # 计数功能,仅仅为了显示处理进度用 just_frame_cnt = 0 just_video_cnt += 1 while True: ret, frame = video_capture.read() if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # 检测框是否显示 # for det in detections: # bbox = det.to_tlbr() # if show_detections and len(classes) > 0: # det_cls = det.cls # score = "%.2f" % (det.confidence * 100) + "%" # cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, # 1e-3 * frame.shape[0], (0, 0, 255), 1) # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # 仅仅使用了跟踪框 tmp_frame = frame.copy() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # print("ord1: ", bbox[0], bbox[1], bbox[2], bbox[3]) if bbox[0] < 0: bbox[0] = 0 if bbox[0] >= frame_wid: bbox[0] = frame_wid - 1 if bbox[1] < 0: bbox[1] = 0 if bbox[1] >= frame_hei: bbox[1] = frame_hei - 1 if bbox[2] < 0: bbox[2] = 0 if bbox[2] >= frame_wid: bbox[2] = frame_wid - 1 if bbox[3] < 0: bbox[3] = 0 if bbox[3] >= frame_hei: bbox[3] = frame_hei - 1 # print("ord2: ", bbox[0], bbox[1], bbox[2], bbox[3]) # if int(bbox[0]) < 0 or int(bbox[1]) < 0 or int(bbox[2]) < 0 or int(bbox[3]) < 0: # continue # 行人图像的裁剪 crop_image = frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] # 裁剪 crop_image = cv2.resize(crop_image, (128, 256)) cv2.imwrite( video_dir + '/frame_' + str(frame_num).zfill(4) + '_ped_' + str(track.track_id) + '.png', crop_image) # Average detection confidence adc = "%.2f" % (track.adc * 100) + "%" cv2.rectangle(tmp_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) cv2.putText(tmp_frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 0, 255), 1) if not show_detections: track_cls = track.cls cv2.putText(tmp_frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( tmp_frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cx = int((int(bbox[0]) + int(bbox[2])) / 2) cy = int((int(bbox[1]) + int(bbox[3])) / 2) w = int(int(bbox[2]) - int(bbox[0])) h = int(int(bbox[3]) - int(bbox[1])) # 下面所有的append操作是为了保存所有跟踪到的结果,方便后需存放npy文件 tmp_result = [] tmp_result.append(video_number) tmp_result.append(city_name) tmp_result.append(str(frame_num)) tmp_result.append(str(track.track_id)) tmp_result.append(str(cx)) tmp_result.append(str(cy)) tmp_result.append(str(w)) tmp_result.append(str(h)) tmp_result.append('0') tmp_result.append('0') tmp_result.append('0') if h > 50: result.append(tmp_result) # my video detection # 这个文件是保存成DTP项目所需要的格式,与上面的保存大致类似,测试用 tmp_myvideo = [] tmp_myvideo.append(city_name + '/' + video_number) tmp_myvideo.append(str(frame_num)) tmp_myvideo.append(int(bbox[0])) tmp_myvideo.append(int(bbox[1])) tmp_myvideo.append(int(bbox[2])) tmp_myvideo.append(int(bbox[3])) # track tmp_myvideo.append(str(track.track_id)) # detection length tmp_myvideo.append(str(0)) # Height tmp_myvideo.append(str(h)) if h > 50: myvideo_result.append(tmp_myvideo) frame_num += 1 # cv2.imshow('', tmp_frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() # 添加一些提示信息 just_frame_cnt += 1 fps = (fps + (1. / (time.time() - t1))) / 2 print("[%d / %d]" % (just_video_cnt, len(all_video_path)), end=' ') print("[frame: %d] [fps: %f]" % (just_frame_cnt, (fps + (1. / (time.time() - t1))) / 2)) # 显示FPS # if not asyncVideo_flag: # fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f" % (fps)) # Press Q to stop! #if cv2.waitKey(1) & 0xFF == ord('q'): # break_flag = 1 # break #if break_flag == 1: # break ''' 带label的跟踪结果保存 ''' f = open('./clip_data/tracking_result.csv', 'w') csv_writer = csv.writer(f) # 写入csv第一行 csv_writer.writerow([ "vid", "filename", "frame_num", "track", "cx", "cy", "w", "h", "track_length", "labeled", "requires_features" ]) # 按照行人序号、城市名的顺序排序,帧数自动排序了 result.sort(key=lambda x: (x[1], eval(x[3]))) # 处理track条目 for i in range(len(result)): if i == 0: pass else: if result[i][3] == result[i - 1][3] and eval( result[i][2]) == eval(result[i - 1][2]) + 1: result[i][8] = str(eval(result[i - 1][8]) + 1) else: result[i][8] = '0' # 处理labeled and require_feature条目 for i in range(len(result)): # 不满足过去30帧、并且存在未来60帧的不进行预测,因为数据帧不足 if i <= 28 or i >= len(result) - 60: pass # csv_writer.writerow(result[i]) else: track_index_now = eval(result[i][8]) track_index_pre29 = eval(result[i - 29][8]) track_index_post60 = eval(result[i + 60][8]) if result[i][3] == result[i - 29][3] and result[i][3] == result[i + 60][ 3] and track_index_now == track_index_pre29 + 29 and track_index_now == track_index_post60 - 60: result[i][9] = '1' result[i][10] = '1' for j in range(i - 29, i): result[j][10] = '1' # csv_writer.writerow(result[i]) for i in range(len(result)): csv_writer.writerow(result[i]) # 写DTP的tracking result csv train_file1 = open('./clip_data/myvideo_yolo_detection_train1.csv', 'w') train_file2 = open('./clip_data/myvideo_yolo_detection_train2.csv', 'w') train_file3 = open('./clip_data/myvideo_yolo_detection_train3.csv', 'w') val_file1 = open('./clip_data/myvideo_yolo_detection_val1.csv', 'w') val_file2 = open('./clip_data/myvideo_yolo_detection_val2.csv', 'w') val_file3 = open('./clip_data/myvideo_yolo_detection_val3.csv', 'w') test_file1 = open('./clip_data/myvideo_yolo_detection_test1.csv', 'w') test_file2 = open('./clip_data/myvideo_yolo_detection_test2.csv', 'w') test_file3 = open('./clip_data/myvideo_yolo_detection_test3.csv', 'w') # 定义csv_writer csv_train1 = csv.writer(train_file1) csv_train1.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_train2 = csv.writer(train_file2) csv_train2.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_train3 = csv.writer(train_file3) csv_train3.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_val1 = csv.writer(val_file1) csv_val1.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_val2 = csv.writer(val_file2) csv_val2.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_val3 = csv.writer(val_file3) csv_val3.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_test1 = csv.writer(test_file1) csv_test1.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_test2 = csv.writer(test_file2) csv_test2.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) csv_test3 = csv.writer(test_file3) csv_test3.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) for i in range(len(myvideo_result)): tmp_path = myvideo_result[i][0] city = tmp_path.split('/')[0] # train if city in Train_cities1: csv_train1.writerow(myvideo_result[i]) if city in Train_cities2: csv_train2.writerow(myvideo_result[i]) if city in Train_cities3: csv_train3.writerow(myvideo_result[i]) # val if city in Validation_cities1: csv_val1.writerow(myvideo_result[i]) if city in Validation_cities2: csv_val2.writerow(myvideo_result[i]) if city in Validation_cities3: csv_val3.writerow(myvideo_result[i]) # test if city in Test_cities1: csv_test1.writerow(myvideo_result[i]) if city in Test_cities2: csv_test2.writerow(myvideo_result[i]) if city in Test_cities3: csv_test3.writerow(myvideo_result[i]) f.close() train_file1.close() train_file2.close() train_file3.close() val_file1.close() val_file2.close() val_file3.close() test_file1.close() test_file2.close() test_file3.close() ''' 写入STED模型所需要的.npy文件 size = (n, 8, 25) ''' # box numpy数据保存 train_val_test_box(result) # output label数据保存 train_val_test_label(result) """ my video detection write 保存成DTP需要的跟踪格式 """ f = open('./clip_data/myvideo_yolo_detection.csv', 'w') csv_writer = csv.writer(f) csv_writer.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) # 按照跟踪序号排序 myvideo_result.sort(key=lambda x: x[6]) # 处理track条目 for i in range(len(myvideo_result)): if i == 0: pass else: # 如何filename相同,track序号相同,帧数连续则表示跟踪 if myvideo_result[i][6] == myvideo_result[i - 1][6] and eval( myvideo_result[i][1]) == eval( myvideo_result[i - 1][1] ) + 1 and myvideo_result[i][0] == myvideo_result[i - 1][0]: myvideo_result[i][7] = str(eval(myvideo_result[i - 1][7]) + 1) else: myvideo_result[i][7] = '0' for i in range(len(myvideo_result)): csv_writer.writerow(myvideo_result[i]) f.close() fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False file_path = 'video.webm' if asyncVideo_flag : video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[...,::-1]) # bgr to rgb boxs = yolo.detect_image(image)[0] confidence = yolo.detect_image(image)[1] features = encoder(frame,boxs) detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0,255,0),2) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = False asyncVideo_flag = False webcamera_flag = True ipcamera_flag = False udp_flag = True file_path = '/workspace/data/C0133_v4.mp4' if asyncVideo_flag : video_capture = VideoCaptureAsync(file_path) elif ipcamera_flag : video_capture = cv2.VideoCapture('rtsp://*****:*****@192.168.2.201/ONVIF/MediaInput?profile=def_profile1') elif webcamera_flag : video_capture = cv2.VideoCapture(0) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 if udp_flag: HOST = '' PORT = 5000 address = '192.168.2.255' sock =socket(AF_INET, SOCK_DGRAM) sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1) sock.bind((HOST, PORT)) fps = 0.0 fps_imutils = imutils.video.FPS().start() savetime = 0 while True: nowtime = datetime.datetime.now().isoformat() ret, frame = video_capture.read() # frame shape 640*480*3 t1 = time.time() if time.time() - savetime >= 30: print('save data') cv2.imwrite("/workspace/images/image.png", frame) savetime = time.time() image = Image.fromarray(frame[...,::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(boxes, confidence, classes, features)] else: detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) # socket message = str(nowtime + "," + str(track.track_id) + "," + str(int(bbox[0])) + "," + str(int(bbox[1])) + "," + str(int(bbox[2])) + "," + str(int(bbox[3]))) bmessage = message.encode('utf-8') print(type(bmessage)) if udp_flag: sock.sendto(message.encode('utf-8'), (address, PORT)) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) #cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 1.0 output_format = 'mp4' video_name = 'bus4_2in_4out.mp4' file_path = join('data_files/videos', video_name) output_name = 'save_data/out_' + video_name[0:-3] + output_format initialize_door_by_yourself = False door_array = None # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False counter = Counter(counter_in=0, counter_out=0, track_id=0) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_name, fourcc, 15, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() ret, first_frame = video_capture.read() if door_array is None: if initialize_door_by_yourself: door_array = select_object(first_frame)[0] print(door_array) else: all_doors = read_door_info('data_files/doors_info.csv') door_array = all_doors[video_name] border_door = door_array[3] error_values = [] truth = get_truth(video_name) while True: ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: total_count = counter.return_total_count() true_total = truth.inside + truth.outside err = abs(total_count - true_total) / true_total log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \ " total: {} / {}\n error: {}\n______________\n".format(video_name, counter.counter_in, truth.inside, counter.counter_out, truth.outside, total_count, true_total, err) with open('log_results.txt', 'w') as file: file.write(log_res) print(log_res) error_values.append(err) break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 2) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: score = "%.2f" % (det.confidence * 100) + "%" rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) rect_door = Rectangle(int(door_array[0]), int(door_array[1]), int(door_array[2]), int(door_array[3])) intersection = rect_head & rect_door if intersection: squares_coeff = rect_square(*intersection) / rect_square( *rect_head) cv2.putText( frame, score + " inter: " + str(round(squares_coeff, 3)), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 100, 255), 5) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # first appearence of object with id=track.id if track.track_id not in counter.people_init or counter.people_init[ track.track_id] == 0: counter.obj_initialized(track.track_id) rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) res = rect_head & rect_door if res: inter_square = rect_square(*res) head_square = rect_square(*rect_head) # was initialized in door, probably going in if (inter_square / head_square) >= 0.8: counter.people_init[track.track_id] = 2 # initialized in the bus, mb going out elif (inter_square / head_square) <= 0.4 or bbox[3] > border_door: counter.people_init[track.track_id] = 1 # res is None, means that object is not in door contour else: counter.people_init[track.track_id] = 1 counter.people_bbox[track.track_id] = bbox counter.cur_bbox[track.track_id] = bbox adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 5) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) id_get_lost = [ track.track_id for track in tracker.tracks if track.time_since_update >= 25 and track.age >= 29 ] id_inside_tracked = [ track.track_id for track in tracker.tracks if track.age > 60 ] for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1]) if val in id_get_lost and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if vector_person[1] > 70 and counter.people_init[ val] == 2: # and counter.people_bbox[val][3] > border_door \ counter.get_in() elif vector_person[1] < -70 and counter.people_init[val] == 1: counter.get_out() counter.people_init[val] = -1 print(f"person left frame") print(f"current centroid - init : {cur_c} - {init_c}\n") print(f"vector: {vector_person}\n") del val # elif val in id_inside_tracked and val not in id_get_lost and counter.people_init[val] == 1 \ # and bb_intersection_over_union(counter.cur_bbox[val], door_array) <= 0.3 \ # and vector_person[1] > 0: # and \ # # counter.people_bbox[val][3] > border_door: # counter.get_in() # # counter.people_init[val] = -1 # print(f"person is tracked for a long time") # print(f"current centroid - init : {cur_c} - {init_c}\n") # print(f"vector: {vector_person}\n") # imaggg = cv2.line(frame, find_centroid(counter.cur_bbox[val]), # find_centroid(counter.people_bbox[val]), # (0, 0, 255), 7) # cv2.imshow('frame', imaggg) # cv2.waitKey(0) ins, outs = counter.show_counter() cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 30), 0, 1e-3 * frame.shape[0], (255, 0, 0), 5) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.resizeWindow('image', 1400, 800) cv2.imshow('image', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows() mean_error = np.mean(error_values) print("mean error for {} video: {}".format(video_name, mean_error))
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('input', type=str, help='Input video path') parser.add_argument('bbox', type=str, help='Input bounding box path') parser.add_argument('output', type=str, help='Ouput video path') args = parser.parse_args() # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = True asyncVideo_flag = False file_path = args.input if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(args.output, fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 # fps_imutils = imutils.video.FPS().start() with open(args.bbox) as f: data = json.load(f) frame_index = 0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes = np.asarray( [pred['bbox'] for pred in data[frame_index]['annotations']]) confidence = np.asarray( [pred['score'] for pred in data[frame_index]['annotations']]) classes = np.asarray( [pred['label'] for pred in data[frame_index]['annotations']]) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) # cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 # fps_imutils.update() # if not asyncVideo_flag: # fps = (fps + (1./(time.time()-t1))) / 2 # print("FPS = %f"%(fps)) # Press Q to stop! # if cv2.waitKey(1) & 0xFF == ord('q'): # break # fps_imutils.stop() # print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.25 max_cross_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.5 frame_rate = 12 file_path = 'reid-wide2' file_path2 = 'reid-long2' show_detections = False writeVideo_flag = False asyncVideo_flag = False beta_calulate_flag = False predict_ns_flag = False predict_time = 0.5 multi_camera_flag = True # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, frame_rate=frame_rate) if multi_camera_flag: metric2 = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker2 = Tracker(metric2, frame_rate=frame_rate) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path + ".mp4") else: video_capture = cv2.VideoCapture(file_path + ".mp4") if multi_camera_flag: video_capture2 = cv2.VideoCapture(file_path2 + ".mp4") if asyncVideo_flag: video_capture.start() if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) if writeVideo_flag: fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(file_path2 + "-output.avi", fourcc, frame_rate, (w, h)) frame_index = -1 if multi_camera_flag: out2 = cv2.VideoWriter(file_path + "-output2.avi", fourcc, frame_rate, (w, h)) fps = 0.0 fps_imutils = imutils.video.FPS().start() alpha = np.arctan((326 + (369 - 326) * (250 - 110) / (300 - 110) - w / 2) / w * 6.4 / 3.6) * 180 / np.pi beta = 0 beta_last = 0 x_temp = 0 track_angle = np.array([]) x_axis = np.array([]) x_predict = np.array([]) x_current = np.array([]) x_kalman = np.array([]) matches_id = [] alert_mode_flag = False while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) if predict_ns_flag: tracker.predict_ns(predict_time) if alert_mode_flag: if len(tracker.tracks) > p: ret = tracker.tracks[track1_idx].is_confirmed() if not ret: alert_mode_flag = False else: alert_mode_flag = False if not alert_mode_flag: for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: det_cls = det.cls score = "%.2f" % (det.confidence * 100) + "%" cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) if predict_ns_flag: if track.x_predict > 0 and track.x_predict < w: cv2.circle(frame, (track.x_predict, int(h / 2)), 15, (0, 0, 255), -1) x_temp += 1 / frame_rate x_axis = np.append(x_axis, x_temp) x_predict = np.append(x_predict, track.x_predict) x_current = np.append(x_current, track.x_current) x_kalman = np.append(x_kalman, track.mean[0]) if beta_calulate_flag: beta = np.arctan( (track.mean[0] - w / 2) / w * 6.4 / 16) * 180 / np.pi else: bbox = tracker.tracks[track1_idx].to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) # if not show_detections: # track_cls = track.cls # adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence # cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), # 1) # cv2.putText(frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) if beta_calulate_flag: if np.isclose(beta, beta_last) and abs(beta) > 7: beta = np.sign(beta) * np.arctan(3.2 / 16) * 180 / np.pi cv2.putText(frame, "Beta_angle: " + '{:4.2f}'.format(beta), (20, 20), 0, 1e-3 * frame.shape[0], (0, 0, 255), 1) beta_last = beta x_temp += 1 / frame_rate x_axis = np.append(x_axis, x_temp) track_angle = np.append(track_angle, alpha + beta) cv2.imshow('camera1', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 if multi_camera_flag: ret, frame = video_capture2.read() # frame shape 640*480*3 if ret != True: break image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker2.predict() tracker2.update(detections) matches, unmatched_tracks_1, unmatched_tracks_2 = cross_tracker_match( tracker, tracker2, max_cross_cosine_distance) matches_id.clear() for track_1_idx, track_2_idx in matches: matches_id.append((tracker.tracks[track_1_idx].track_id, tracker2.tracks[track_2_idx].track_id)) print("Matches:", matches_id) if alert_mode_flag: if len(tracker2.tracks) > track2_idx: ret = tracker2.tracks[track2_idx].is_confirmed() if not ret: alert_mode_flag = False else: alert_mode_flag = False if not alert_mode_flag: for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: det_cls = det.cls score = "%.2f" % (det.confidence * 100) + "%" cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) for track in tracker2.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) if cv2.waitKey(1) & 0xFF == ord('t'): roi = cv2.selectROI("Select Target", frame) Roi = [Detection(roi, 0, None, 0)] ret, track2_idx = ROI_target_match(tracker2, Roi) cv2.destroyWindow("Select Target") if ret: t_list = [i for (i, j) in matches if j == track2_idx] if t_list != []: track1_idx = t_list[0] alert_mode_flag = True else: bbox = tracker2.tracks[track2_idx].to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.imshow('camera2', frame) if writeVideo_flag: # save a frame out2.write(frame) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break # if not asyncVideo_flag: # fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %.2f" % (fps)) fps_imutils.update() fps_imutils.stop() print('imutils FPS: {:.2f}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() if multi_camera_flag: out2.release() if predict_ns_flag: np.save("x_axis.npy", x_axis) np.save("x_predict.npy", x_predict) np.save("x_current.npy", x_current) np.save("x_kalman.npy", x_kalman) plt.plot(x_axis, x_current, label='X_Measured') plt.plot(x_axis, x_kalman, label='X_Filtered') plt.plot(x_axis, x_predict, label='X_Predicted') plt.legend() plt.xlabel('Time (s)') plt.ylabel('Pixel') plt.title('Motion Prediction') plt.show() if beta_calulate_flag: np.save("x_axis.npy", x_axis) np.save("track_angle.npy", track_angle) plt.plot(x_axis, track_angle) plt.xlabel('Time (s)') plt.ylabel('Angle (deg)') plt.title('Target Angle') plt.show() cv2.destroyAllWindows()
file_path = '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/videos/abc.mp4' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_custom.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() # L # for img_path in tqdm(img_paths): count = 0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False showImg_flag = True file_path = 'C:\\Users\\Divided\\Desktop\\test_vid.mp4' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: preprocess_time_start = get_current_ms() ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break overlay = frame.copy() t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb preprocess_time = get_current_ms() - preprocess_time_start # print("Preprocessing time:\t{}".format(preprocess_time)) detection_time_start = get_current_ms() boxs = yolo.detect_image(image)[0] confidence = yolo.detect_image(image)[1] detection_time = get_current_ms() - detection_time_start # print("Detection time:\t{}".format(detection_time)) features = encoder(frame, boxs) detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features)] nms_start_time = get_current_ms() # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] nms_time = get_current_ms() - nms_start_time # print("NMS time:\t{}".format(nms_time)) tracker_start_time = get_current_ms() # Call the tracker tracker.predict() tracker.update(detections) tracker_time = get_current_ms() - tracker_start_time # print("Tracker time:\t{}".format(tracker_time)) postprocess_start_time = get_current_ms() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue track_bbox = track.to_tlbr() cv2.rectangle(frame, (int(track_bbox[0]), int(track_bbox[1])), (int(track_bbox[2]), int(track_bbox[3])), (255, 255, 255), 1) if track.det_hist: det_curr = track.det_hist[-1] det_bbox = det_curr.to_tlbr() score = "%.2f" % round(det_curr.confidence, 2) cv2.rectangle(overlay, (int(det_bbox[0]), int(det_bbox[1])), (int(det_bbox[2]), int(det_bbox[3])), track.color, 2) cv2.putText(overlay, score, (int(det_bbox[0]) + 5, int(det_bbox[3]) - 5), 0, 0.5, (255, 255, 255), 2) cv2.putText(overlay, str(track.track_id), (int(det_bbox[0]) + 5, int(det_bbox[1]) - 5), 0, 0.5, (255, 255, 255), 2) # centroid = det_curr.get_centroid() # cv2.circle(overlay, (int(centroid[0]), int(centroid[1])), radius=3, # color=(0, 0, 255), thickness=-1) alpha = 0.5 # Transparency factor. # Following line overlays transparent rectangle over the image frame = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0) if showImg_flag: cv2.imshow('', frame) postprocess_time = get_current_ms() - postprocess_start_time # print("Postprocess time:\t{}".format(postprocess_time)) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 print("{}/{}/{}/{}/{}".format(preprocess_time, detection_time, nms_time, tracker_time, postprocess_time)) print("FPS = %f" % fps) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters h = 0 w = 0 frame_index = -1 fps = 0.0 flows = OrderedDict() max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True asyncVideo_flag = False file_path = '/home/sohaibrabbani/Downloads/overhead_people_clips/baggageclaim.mp4' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov3.avi', fourcc, video_capture.get(cv2.CAP_PROP_FPS), (w, h)) # frame_index = -1 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs = yolo.detect_image(image)[0] confidence = yolo.detect_image(image)[1] features = encoder(frame, boxs) detections = [ Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 60: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # Initializing the flow list for saving the flow data of each person - Sohaib if track.track_id not in flows: flows[track.track_id] = OrderedDict() # Saving location of a person in a frame - Sohaib flows[track.track_id][frame_index + 1] = np.array( [int(bbox[0]), int(bbox[1])]) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0, 255, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break # Calling the serialize function to save the pickle file - Sohaib serialize_flow_data(flows=flows, height=h, width=w, file_path=file_path, frame_count=video_capture.get( cv2.CAP_PROP_FRAME_COUNT), fps=video_capture.get(cv2.CAP_PROP_FPS)) fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False file_path = '/media/wangsen/新加卷/MOF_data/citywalks/clips/PADUA/clip_000001.mp4' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() frame_num = 0 result = [] ped_cnt = 0 ped_index_store = [] while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # for det in detections: # bbox = det.to_tlbr() # if show_detections and len(classes) > 0: # det_cls = det.cls # score = "%.2f" % (det.confidence * 100) + "%" # cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() if int(bbox[0]) < 0 or int(bbox[1]) < 0 or int(bbox[2]) < 0 or int( bbox[3]) < 0: continue # 行人图像的裁剪 crop_image = frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] # 裁剪 crop_image = cv2.resize(crop_image, (128, 256)) cv2.imwrite( './crop_image/frame_' + str(frame_num).zfill(4) + '_ped_' + str(track.track_id) + '.png', crop_image) adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) w = int(int(bbox[2]) - int(bbox[0])) h = int(int(bbox[3]) - int(bbox[1])) tmp_result = [] # filename tmp_result.append("clip_000001.mp4") # frame_num tmp_result.append(str(frame_num)) # bb1~bb4 tmp_result.append(int(bbox[0])) tmp_result.append(int(bbox[1])) tmp_result.append(int(bbox[2])) tmp_result.append(int(bbox[3])) # track tmp_result.append(str(track.track_id)) # detection length tmp_result.append(str(0)) # Height tmp_result.append(str(h)) result.append(tmp_result) frame_num += 1 cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break # 正式写入 f = open('myvideo_yolo_detection.csv', 'w') csv_writer = csv.writer(f) # 写入头 csv_writer.writerow([ "filename", "frame_num", "bb1", "bb2", "bb3", "bb4", "track", "detection_length", "Height" ]) # 按照跟踪序号排序 result.sort(key=lambda x: x[6]) # Item handle: track for i in range(len(result)): if i == 0: pass else: # 如何filename相同,track序号相同,帧数连续则表示跟踪 if result[i][6] == result[i - 1][6] and eval(result[i][1]) == eval(result[i - 1][1]) + 1 and result[i][0] == \ result[i - 1][0]: result[i][7] = str(eval(result[i - 1][7]) + 1) else: result[i][7] = '0' for i in range(len(result)): csv_writer.writerow(result[i]) f.close() fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = True asyncVideo_flag = False file_path = 'video.webm' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() model_par, valid_transform = model_init_par() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] crop_img = image.crop( [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) #res_txt = demo_par(model_par, valid_transform, crop_img) #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) font = ImageFont.truetype( '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf', size=10) # positive_cnt = 1 # for txt in res_txt: # if 'personal' in txt: # #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font) # cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) # positive_cnt += 1 cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # Calculate cosine Distance Metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # Flags for process tracking = True # Set False if you only want to do detection writeVideo_flag = True # Set False if you don't want to write frames locally asyncVideo_flag = False # It uses asynchronous processing for better FPS :Warning: Shuttering Problem # Video File Path file_path = '/mydrive/test.mp4' # Check if asyncVideo flag set to True if asyncVideo_flag : video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_file = os.path.basename(file_path)[:-4] out = cv2.VideoWriter('./Output/' + output_file + "-prueba-test.avi", fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # Capture frames if ret != True: break t1 = time.time() # bgr to rgb frame conversion image = Image.fromarray(frame[...,::-1]) # YOLOv4 Detection boxes, confidence, classes = yolo.detect_image(image) if tracking: # Encodes the frame and boxes for DeepSORT features = encoder(frame, boxes) # DeepSORT Detection detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(boxes, confidence, classes, features)] else: # Only YOLOv4 Detection detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # Draw white bbox for DeepSORT cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,(0, 255, 0), 1) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) # Check the class for colored bbox if len(classes) > 0: cls = det.cls center_bbox = (int(bbox[2]), int(bbox[2])) if str(cls) == 'person': # Draw Blue bbox for YOLOv4 person detection cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (219, 152, 52), 2) elif str(cls) == 'backpack' or 'handbag' or 'suitcase': # Draw Orange bbox for YOLOv4 handbag, backpack and suitcase detection cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (65, 176, 245), 2) if not asyncVideo_flag: fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) cv2.putText(frame, "GPU: NVIDIA Tesla P100", (5, 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) cv2.putText(frame, "FPS: %.2f" % fps, (5, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) # draw the timestamp on the frame timestamp = datetime.datetime.now() ts = timestamp.strftime("%d/%m/%Y, %H:%M:%S") cv2.putText(frame, ts, (5, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) #cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release()
class CameraSource(object): def __init__(self, cameraSource, height, output_file=None, startFrame=0, async_read=False, outputToServer=False, capture_size=None): if async_read: self.camera = VideoCaptureAsync(cameraSource) else: self.camera = cv2.VideoCapture(cameraSource) if capture_size is not None: print(capture_size) self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, capture_size[0]) self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, capture_size[1]) if async_read: self.ORIGINAL_WIDTH = self.camera.width self.ORIGINAL_HEIGHT = self.camera.height else: self.ORIGINAL_WIDTH = int(self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)) self.ORIGINAL_HEIGHT = int(self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)) print('CameraSource') print('Requested capture size', capture_size) print('Actual capture size', self.ORIGINAL_WIDTH, self.ORIGINAL_HEIGHT) self.HEIGHT = height self.WIDTH = self.ORIGINAL_WIDTH * self.HEIGHT // self.ORIGINAL_HEIGHT self.WIDTH = self.WIDTH + self.WIDTH % 2 # Make it even. self.startFrame = startFrame self.nFrames = 0 self.writer = VideoWriter(output_file) if async_read: self.camera.start() self.outputToServer = outputToServer if outputToServer: # https://robotpy.readthedocs.io/en/stable/vision/code.html pass #self.outputStream = CameraServer.getInstance().putVideo( # 'ProcessedVisionFrame', self.WIDTH, self.HEIGHT) def GetFrame(self): # Processing on first call. if self.nFrames == 0: # Skip some frames if requested. if self.startFrame > 0: skippedFrames = 0 while True: ret, frame = self.camera.read() if not ret or frame is None: print('No more frames') return None skippedFrames += 1 if skippedFrames >= self.startFrame: break # Start timer for first frame. self.startTime = time.time() # Get frame. frame = None frameTime = time.time() if self.nFrames > 0 and self.nFrames % 50 == 0: print('FPS: ', self.nFrames / (frameTime - self.startTime)) self.nFrames += 1 ret, frame = self.camera.read() if ret and frame is not None: if frame.shape[0] != self.HEIGHT: frame = cv2.resize(frame, (self.WIDTH, self.HEIGHT)) return frame def ImageSize(self): return (self.WIDTH, self.HEIGHT) def OutputFrameAndTestContinue(self, message, frame, height=None): self.writer.OutputFrame(frame) if self.outputToServer: self.outputStream.putFrame(frame) return ShowFrameAndTestContinue(message, frame, height) def __del__(self): self.camera.release() cv2.destroyAllWindows()
def main(): PATH_TO_CKPT_PERSON = 'models/faster_rcnn_restnet50.pb' # Definition of the parameters max_cosine_distance = 0.3 nn_budget = 200 nms_max_overlap = 1.0 yolo = YOLO() reid = PERSON_REID() frozen_person = FROZEN_GRAPH_INFERENCE(PATH_TO_CKPT_PERSON) # # Deep SORT # model_filename = 'model_data/mars-small128.pb' # encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # file_path = 0 if VIDEO_CAPTURE == 0 and asyncVideo_flag == True: video_capture = VideoCaptureAsync(file_path) elif VIDEO_CAPTURE == 1 and asyncVideo_flag == True: video_capture = myVideoCapture(file_path) else: video_capture = cv2.VideoCapture(file_path) im_width = int(video_capture.get(3)) im_height = int(video_capture.get(4)) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_filename, fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() boxs = list() confidence = list() persons = list() frame_count = 0 track_count = 0 num_files = 0 while True: t1 = time.time() ret, frame = video_capture.read() # frame shape 640*480*3 frame_org = frame.copy() if ret != True: break frame_count += 1 # print('Frame count: {}'.format(frame_count)) # Person detection using Frozen Graph persons = frozen_person.run_frozen_graph(frame, im_width, im_height) boxs = [[ person['left'], person['top'], person['width'], person['height'] ] for person in persons] confidence = [person['confidence'] for person in persons] cropped_persons = list(person['cropped'] for person in persons) # # Person detection using YOLO - Keras-converted model # image = Image.fromarray(frame[...,::-1]) # bgr to rgb # boxs = yolo.detect_image(image)[0] # confidence = yolo.detect_image(image)[1] # cropped_persons = [np.array(frame[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]) for box in boxs] #[x,y,w,h] # features = encoder(frame, boxs) if len(cropped_persons) > 0: features = reid.extract_feature_imgTensor(cropped_persons) # print(features.shape) detections = [ Detection(bbox, confidence, feature) for bbox, confidence, feature in zip( boxs, confidence, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) directory = os.path.join('output', str(track.track_id)) if not os.path.exists(directory): os.makedirs(directory) # file_count = len([name for name in os.listdir(directory+'/') if os.path.isfile(name)]) file_count = sum( [len(files) for root, dirs, files in os.walk(directory)]) # print(file_count) if file_count == 0: cv2.imwrite( directory + '/' + str(file_count + 1) + '.jpg', frame_org[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]) elif file_count > 0 and track_count % 10 == 0: cv2.imwrite( directory + '/' + str(file_count + 1) + '.jpg', frame_org[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]) track_count += 1 for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 130, (0, 255, 0), 2) cv2.imshow('YOLO DeepSort', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() # print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 width = 1280 height = 720 rfps = 10 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = args.tracking writeVideo_flag = args.writeVideo_flag asyncVideo_flag = args.asyncVideo_flag webcamera_flag = args.webcamera_flag ipcamera_flag = args.ipcamera_flag udp_flag = args.udp_flag full_cam_addr, key = sd.set_address(args.ipaddress, args.cam_ip, args.cam_cmd, args.key) cam_ip = full_cam_addr.replace(args.cam_cmd, "").replace("rtsp://*****:*****@", "") if args.jpegmode: full_cam_addr = full_cam_addr.replace("rtsp", "http") print(full_cam_addr) print(key) if asyncVideo_flag: print("load videofile") video_capture = VideoCaptureAsync(args.videofile) elif ipcamera_flag or args.jpegmode: print("load ipcamera") video_capture = cv2.VideoCapture(full_cam_addr) # width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) # height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) # rfps = video_capture.get(cv2.CAP_PROP_FPS) print("fps:{}width:{}height:{}".format(rfps, width, height)) elif webcamera_flag: print("load webcamera") video_capture = cv2.VideoCapture(0) else: print("load videofile") video_capture = cv2.VideoCapture(args.videofile) # video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 if udp_flag: HOST = '' PORT = 5000 address = '192.168.2.255' sock = socket(AF_INET, SOCK_DGRAM) sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1) sock.bind((HOST, PORT)) fps = 0.0 i = 0 if not args.maskoff: maskbgi = Image.new('RGB', (int(width), int(height)), (0, 0, 0)) mask = Image.open(args.maskdir + 'mask' + args.ipaddress[-1] + '.png').convert("L").resize(size=(int(width), int(height)), resample=Image.NEAREST) while True: nowtime = datetime.datetime.now( timezone('Asia/Tokyo')).strftime('%Y-%m-%d %H:%M:%S.%f%z') ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: print('cant read') video_capture = cv2.VideoCapture(full_cam_addr) continue t1 = time.time() try: image = Image.fromarray(frame[..., ::-1]) # bgr to rgb except TypeError: video_capture = cv2.VideoCapture(full_cam_addr) continue image = Image.composite(maskbgi, image, mask) boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] car_data = {} if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() car_data[str(track.track_id)] = [ int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) ] sd.send_amqp( sd.create_jsondata(cam_ip, nowtime, time.time() - t1, car_data, args.jsonfile, args.json_path, i), key, args.AMQPHost) i += 1 if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) ### 読み飛ばし処理を追加 ### if not args.jsonfile and args.skip: if fps <= 10: for _i in range(int(math.ceil(rfps / fps)) - 1): ret, frame = video_capture.read() if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters with open("cfg/detection_tracker_cfg.json") as detection_config: detect_config = json.load(detection_config) with open("cfg/doors_info.json") as doors_config: doors_config = json.load(doors_config) with open("cfg/around_doors_info.json") as around_doors_config: around_doors_config = json.load(around_doors_config) model_filename = detect_config["tracking_model"] input_folder, output_folder = detect_config["input_folder"], detect_config[ "output_folder"] meta_folder = detect_config["meta_folder"] output_format = detect_config["output_format"] # Deep SORT max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) show_detections = True asyncVideo_flag = False check_gpu() # from here should start loop to process videos from folder # for video_name in os.listdir(input_folder): HOST = "localhost" PORT = 8075 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.bind((HOST, PORT)) sock.listen() conn, addr = sock.accept() with conn: print('Connected by', addr) # loop over all videos while True: data = conn.recv(1000) video_motion_list = data.decode("utf-8").split(';') videos_que = deque() for video_motion in video_motion_list: videos_que.append(video_motion) video_name = videos_que.popleft() if not video_name.endswith(output_format): continue print('elements in que', len(videos_que)) print("opening video: {}".format(video_name)) full_video_path = join(input_folder, video_name) # full_video_path = "rtsp://*****:*****@192.168.1.52:554/1/h264major" meta_name = meta_folder + video_name[:-4] + ".json" with open(meta_name) as meta_config_json: meta_config = json.load(meta_config_json) camera_id = meta_config["camera_id"] if not os.path.exists(output_folder + str(camera_id)): os.mkdir(output_folder + str(camera_id)) output_name = output_folder + camera_id + '/out_' + video_name counter = Counter(counter_in=0, counter_out=0, track_id=0) tracker = Tracker(metric) if asyncVideo_flag: video_capture = VideoCaptureAsync(full_video_path) video_capture.start() w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: video_capture = cv2.VideoCapture(full_video_path) w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_name, fourcc, 25, (w, h)) door_array = doors_config["{}".format(camera_id)] around_door_array = tuple( around_doors_config["{}".format(camera_id)]) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) border_door = door_array[3] # loop over video save_video_flag = False while True: fps_imutils = imutils.video.FPS().start() ret, frame = video_capture.read() if not ret: with open('videos_saved/log_results.txt', 'a') as log: log.write( 'processed (ret). Time: {}, camera id: {}\n'. format(video_name, camera_id)) break t1 = time.time() # lost_ids = counter.return_lost_ids() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # image = image.crop(around_door_array) boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 3) if len(detections) != 0: counter.someone_inframe() for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: score = "%.2f" % (det.confidence * 100) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3) else: if counter.need_to_clear(): counter.clear_all() # identities = [track.track_id for track in tracker.tracks] # counter.update_identities(identities) for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: continue bbox = track.to_tlbr() if track.track_id not in counter.people_init or counter.people_init[ track.track_id] == 0: # counter.obj_initialized(track.track_id) ratio_init = find_ratio_ofbboxes( bbox=bbox, rect_compare=rect_door) if ratio_init > 0: if ratio_init >= 0.5: # and bbox[3] < door_array[3]: counter.people_init[ track.track_id] = 2 # man in the door elif ratio_init < 0.5: # and bbox[3] > door_array[3]: # initialized in the outside counter.people_init[track.track_id] = 1 else: counter.people_init[track.track_id] = 1 counter.people_bbox[track.track_id] = bbox counter.cur_bbox[track.track_id] = bbox adc = "%.2f" % (track.adc * 100 ) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1]) + 50), 0, 1e-3 * frame.shape[0], (0, 255, 0), 3) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 3) cv2.putText(frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 3) # if track.time_since_update >= 15: # id_get_lost.append(track.track_id) id_get_lost = [ track.track_id for track in tracker.tracks if track.time_since_update >= 15 ] for val in counter.people_init.keys(): ratio = 0 cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) if val in id_get_lost and counter.people_init[ val] != -1: ratio = find_ratio_ofbboxes( bbox=counter.cur_bbox[val], rect_compare=rect_door) if counter.people_init[val] == 2 \ and ratio < 0.6: # and counter.people_bbox[val][3] > border_door \ counter.get_out() save_video_flag = True print(counter.people_init[val], ratio) elif counter.people_init[val] == 1 \ and ratio >= 0.6: counter.get_in() save_video_flag = True print(counter.people_init[val], ratio) counter.people_init[val] = -1 ins, outs = counter.return_counter() cv2.rectangle(frame, (frame.shape[1] - 150, 0), (frame.shape[1], 50), (0, 0, 0), -1, 8) cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (frame.shape[1] - 140, 20), 0, 1e-3 * frame.shape[0], (255, 255, 255), 3) out.write(frame) fps_imutils.update() if not asyncVideo_flag: pass # fps = (1. / (time.time() - t1)) # print("FPS = %f" % fps) # if len(fpeses) < 15: # fpeses.append(round(fps, 2)) # # elif len(fpeses) == 15: # # fps = round(np.median(np.array(fpeses))) # median_fps = float(np.median(np.array(fpeses))) # fps = round(median_fps, 1) # print('max fps: ', fps) # # fps = 20 # counter.fps = fps # fpeses.append(fps) if cv2.waitKey(1) & 0xFF == ord('q'): break if asyncVideo_flag: video_capture.stop() del video_capture else: video_capture.release() if save_video_flag: with open('videos_saved/log_results.txt', 'a') as log: log.write( 'detected!!! time: {}, camera id: {}, detected move in: {}, out: {}\n' .format(video_name, camera_id, ins, outs)) log.write('video written {}\n\n'.format(output_name)) out.release() else: if out.isOpened(): out.release() if os.path.isfile(output_name): os.remove(output_name) if os.path.isfile(full_video_path): os.remove(full_video_path) if os.path.isfile(meta_name): os.remove(meta_name) save_video_flag = False cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) np.random.seed(42) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") writeVideo_flag = True asyncVideo_flag = False file_path = 'testvideo2.avi' # load the COCO class labels our YOLO model was trained on 加载我们的YOLO模型经过培训的COCO类标签 labelsPath = os.path.sep.join(["model_data", "coco_classes.txt"]) LABELS = open(labelsPath).read().strip().split("\n") # print(str(len(LABELS))+"load successfully") # print(LABELS) class_nums = np.zeros(80) counter = np.zeros(80) track_id_max = -1 if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('testvideo2_out.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() img_num = 0 frame_cnt = 0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 frame_copy = frame.copy() if ret != True or frame_cnt > 30: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs, confidence, class_names = yolo.detect_image(image) features = encoder(frame, boxs) detections = [Detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidence, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) # print("print indices!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") # print(indices) # for i in indices: # print(str(i)+class_names[i][0]) # print(indices.shape) detections = [detections[i] for i in indices] class_names = [class_names[i] for i in indices] print("class_name:" + str(class_names)) class_IDs = [] current_nums = np.zeros(80) # class_IDs=[] for class_name in class_names: for i, LABEL in enumerate(LABELS): if class_name[0] == LABEL: current_nums[i] += 1 class_IDs.append(i) # print("person:"+str(current_nums[0])) cv2.putText(frame, 'Current', (20, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 255, 255), 2) cv2.putText(frame, 'Total', (180, 70), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 255), 2) x1 = 20 y1 = 100 for i, cl in enumerate(current_nums): if cl > 0: cv2.putText(frame, LABELS[i] + "=" + str(cl), (x1, y1), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1) # 当前帧各类别数量 y1 = y1 + 20 for i, det in enumerate(detections): bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[1]) + 10), cv2.FONT_HERSHEY_DUPLEX, 0.3, (255, 255, 255), 1) # cv2.putText(frame, class_names[i],(int(bbox[0]), int(bbox[1])-5), 0, 5e-3 * 130, (0, 255, 0), 2) # cv2.putText(frame, class_names[i], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 130,(0, 255, 255), 2) print("Total of detections:" + str(len(detections))) # Call the tracker tracker.predict() tracker.update(detections, class_IDs) # for i, cl in enumerate(class_nums): # if cl > 0: # print("add: " + LABELS[i] + str(cl - class_last_nums[i])) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: print("not track.is_confirmed() or track.time_since_update > 1: " + str(track.track_id)) continue bbox = track.to_tlbr() color = [int(c) for c in COLORS[track.class_id % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.putText(frame, str(track.track_id) + ' ' + LABELS[track.class_id], (int(bbox[0]), int(bbox[1]) - 5), cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1) if track.track_id > track_id_max: counter[track.class_id] = counter[track.class_id] + 1 track_id_max = track.track_id dest = frame_copy[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] pdest = "./output_image/" + str(LABELS[track.class_id]) + str(int(counter[track.class_id])) + ".png" cv2.imwrite(pdest, dest) img_num += 1 # class_nums[track.class_id].append(track.track_id) # print(str(LABELS[track.class_id])+":"+class_nums[track.class_id]) # print("track.id: " + str(track.track_id)) # print("track.class_name: " + str(LABELS[track.class_id])) print(str(counter)) print("--------------------------该帧输出完毕!--------------------------------------") # cv2.putText(frame, 'Total', (200, 60), cv2.FONT_HERSHEY_DUPLEX, 0.75, (255, 0, 0), 2) # x2 = 200 # y2 = 100 # for i, cl in enumerate(class_nums): # if cl > 0: # cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 0, 0), 2) # y2 = y2 + 20 cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # !!!!!!!!!输出FPS x2 = 180 y2 = 100 for i, cl in enumerate(counter): if cl > 0: cv2.putText(frame, LABELS[i] + "=" + str(cl), (x2, y2), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 255), 1) y2 = y2 + 20 # cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f"%(fps)) frame_cnt += 1 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = True asyncVideo_flag = False file_path = 'IMG_3326.MOV' dfObj = pd.DataFrame( columns=['frame_num', 'track', 'cx', 'cy', 'w', 'h', 'track_temp']) dfObjDTP = pd.DataFrame(columns=[ 'filename', 'frame_num', 'bb1', 'bb2', 'bb3', 'bb4', 'track', 'track_temp', 'Height' ]) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() #Ini buat cropping gambar per frame #cropped_image = frame[int(bbox[1]):int(bbox[1])+(int(bbox[3])-int(bbox[1])),int(bbox[0]):int(bbox[0])+(int(bbox[2])-int(bbox[0]))] cropped_image = frame[int(bbox[1]):int(bbox[1]) + 256, int(bbox[0]):int(bbox[0]) + 128] # cropped_image = frame[2:5,6:10] # Matiin atau comment biar ga ada box putih # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # # cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, # 1.5e-3 * frame.shape[0], (0, 255, 0), 1) # print(cropped_image) dirname = "output_crop/{}".format(track.track_id) if not os.path.exists(dirname): os.makedirs(dirname) if (cropped_image.size == 0): continue else: writeStatus = cv2.imwrite( "output_crop/{}/frame_{}.png".format( track.track_id, frame_index), cropped_image) print("output_crop/{}/frame_{}.png".format( track.track_id, frame_index)) # Write CSV dfObj = dfObj.append(pd.Series([ frame_index, track.track_id, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]), track.time_since_update ], index=dfObj.columns), ignore_index=True) dfObjDTP = dfObjDTP.append(pd.Series([ file_path, frame_index, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), track.track_id, track.time_since_update, int(bbox[3]) - int(bbox[1]) ], index=dfObjDTP.columns), ignore_index=True) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" #Matiin atau comment biar ga ada box putih di crop image # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # if len(classes) > 0: # cls = det.cls # cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, # 1.5e-3 * frame.shape[0], (0, 255, 0), 1) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() dfObj = dfObj.sort_values(["track", "frame_num"], ascending=(True, True)) dfObj.to_csv(r'result_temp.csv', index=False) dfObjDTP = dfObjDTP.sort_values(["track", "frame_num"], ascending=(True, True)) dfObjDTP.to_csv(r'result_temp_dtp.csv', index=False) convert_to_final() cv2.destroyAllWindows()
def run_video(self): # init for classify module clf_model = None clf_labels = None encoder = gdet.create_box_encoder(self.cfg.DEEPSORT.MODEL, batch_size=4) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", self.cfg.DEEPSORT.MAX_COSINE_DISTANCE, self.cfg.DEEPSORT.NN_BUDGET) tracker = Tracker(self.cfg, metric) tracking = True writeVideo_flag = self.args.out_video asyncVideo_flag = False list_classes = ['loai_1', 'loai_2', 'loai_3', 'loai_4'] arr_cnt_class = np.zeros((len(list_classes), self.number_MOI), dtype=int) fps = 0.0 fps_imutils = imutils.video.FPS().start() counted_obj = [] count_frame = 0 objs_dict = {} if asyncVideo_flag: video_capture = VideoCaptureAsync(self.video_path) else: video_capture = cv2.VideoCapture(self.video_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_camname = 'output' + '_' + self.video_name + '.avi' out = cv2.VideoWriter(output_camname, fourcc, 10, (1280, 720)) frame_index = -1 while True: count_frame += 1 print('frame processing .......: ', count_frame) ret, frame = video_capture.read() if ret != True: break frame_info = self.video_name + "_" + str(count_frame - 1) t1 = time.time() # frame = cv2.flip(frame, -1) _frame = self.process(frame, count_frame, frame_info, encoder, tracking, tracker, objs_dict, counted_obj, arr_cnt_class, clf_model, clf_labels) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(_frame) frame_index = frame_index + 1 # visualize if self.args.visualize: _frame = imutils.resize(_frame, width=1000) cv2.imshow("Final result", _frame) fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def run_video(self): # init for classify module clf_model = None clf_labels = None if self.use_classify: clf_model, clf_labels = mobileNet.load_model_clf(self.cfg) encoder = gdet.create_box_encoder(self.cfg.DEEPSORT.MODEL, batch_size=4) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", self.cfg.DEEPSORT.MAX_COSINE_DISTANCE, self.cfg.DEEPSORT.NN_BUDGET) tracker = Tracker(metric) tracking = True writeVideo_flag = True asyncVideo_flag = False list_classes = ['loai_1', 'loai_2', 'loai_3', 'loai_4'] arr_cnt_class = np.zeros((len(list_classes), self.number_MOI), dtype=int) fps = 0.0 fps_imutils = imutils.video.FPS().start() counted_obj = [] count_frame = 0 objs_dict = {} # file_path = 'data/demo.MOV' if asyncVideo_flag: video_capture = VideoCaptureAsync(self.video_path) else: video_capture = cv2.VideoCapture(self.video_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 while True: count_frame += 1 ret, frame = video_capture.read() if ret != True: break t1 = time.time() # frame = cv2.flip(frame, -1) _frame = frame _frame = MOI.config_cam(_frame, self.cfg) # draw board ROI_board = np.zeros((150, 170, 3), np.int) _frame[0:150, 0:170] = ROI_board _frame, list_col = init_board(_frame, self.number_MOI) _frame_height, _frame_width = _frame.shape[:2] cropped_frame = frame # cv2.rectangle(_frame, (int(frame_width*0), int(_frame_height*0.1)), (int(_frame_width*0.98), int(_frame_height*0.98)), (255, 0, 0), 2) print("[INFO] Detecting.....") detections, detections_in_ROI = self.run_detection( cropped_frame, encoder, tracking, count_frame) print("[INFO] Tracking....") _, objs_dict = self.draw_tracking(cropped_frame, tracker, tracking, detections_in_ROI, count_frame, objs_dict) print("[INFO] Counting....") _frame, arr_cnt_class, vehicles_detection_list = self.counting(count_frame, cropped_frame, _frame, \ objs_dict, counted_obj, arr_cnt_class, clf_model, clf_labels) # delete counted id for track in tracker.tracks: if int(track.track_id) in counted_obj: track.delete() # write result to txt with open(self.result_filename, 'a+') as result_file: for frame_id, movement_id, vehicle_class_id in vehicles_detection_list: result_file.write('{} {} {} {}\n'.format( self.video_name, frame_id, movement_id, vehicle_class_id)) # write number to scoreboard _frame = write_board(_frame, arr_cnt_class, list_col, self.number_MOI) # visualize if self.args.visualize: _frame = imutils.resize(_frame, width=1000) cv2.imshow("Final result", _frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(_frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()