def run(self): results = [] idx_frame = 0 while self.vdo.grab(): # 将指针向后移一个 return true or false idx_frame += 1 if idx_frame % self.args.frame_interval: # frame_interval = 1 当frame_interval =2 时, 只有2的倍数才执行后面操作 continue start = time.time() _, ori_im = self.vdo.retrieve() # 输出 当前 指向的图像 im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection # bbox xywh class列的置信度 class列值 bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class # yolo 会判断很多类型,其中 C ==0 的才是行人 mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # 把 行人的 bbox 提取出来 # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 for img_filename in self.imgs_filenames: idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() ori_im = cv2.imread( os.path.join(self.images_path, 'img1', img_filename)) im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
def main(): args = parse_args() model = model_factory(args) word_id_lst, post_lsts, _, _, _, pos_lsts = read_data(args.input) freqs_lst = [] odir = Path(args.output).parent if not odir.is_dir(): os.mkdir(odir) model.load() for post, pos_tags in zip(post_lsts, pos_lsts): freqs_lst.append(model.predict(post, pos_tags)) write_results(word_id_lst, post_lsts, freqs_lst, args.output) print("Output file created successfully")
def run(self): results = [] names = {} dists_min = {} idx_frame = 0 while self.vdo.grab() and self.vdo_depth.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() _, ori_im_depth = self.vdo_depth.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im_depth = cv2.cvtColor(ori_im_depth, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] im_t = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) for bb_xyxy, id in zip(bbox_xyxy, identities): bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) x1, y1, x2, y2 = [int(i) for i in bb_xyxy] im_person = im_t[y1:y2, x1:x2, :] # face recognition name, dist_min = who(im_person, self.face_path) if not (id in names): names[id] = name dists_min[id] = dist_min else: if dists_min[id] > dist_min: names[id] = name dists_min[id] = dist_min im_person = im_t[y1:y2, x1:x2, :] / 255 # pose pose, colors, pairs, colors_skeleton = pose_estimate( im_person) pose += np.array([x1, y1]) pose = pose.astype(np.int) for idx in range(len(colors)): cv2.circle(ori_im, (pose[idx, 0], pose[idx, 1]), 3, colors[idx], thickness=3, lineType=8, shift=0) for idx in range(len(colors_skeleton)): ori_im = cv2.line( ori_im, (pose[pairs[idx][0], 0], pose[pairs[idx][0], 1]), (pose[pairs[idx][1], 0], pose[pairs[idx][1], 1]), colors_skeleton[idx], 3) # fall down detection is_fall = fall(pose) if is_fall: t_size = cv2.getTextSize('fall', cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] cv2.putText(ori_im, 'fall', (x1, y1 - t_size[1] - 4), cv2.FONT_HERSHEY_PLAIN, 2, [0, 0, 255], 2) ori_im = draw_boxes(ori_im, bbox_xyxy, identities, names) ori_im = draw_ID(ori_im, im_depth, bbox_xyxy, pose) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 while self.vdo.grab() and self.vdo_depth.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() # video """ _, ori_im = self.vdo.retrieve() _, ori_im_depth = self.vdo_depth.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im_depth = cv2.cvtColor(ori_im_depth, cv2.COLOR_BGR2RGB) """ # realsense2 pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) # start streaming pipeline.start(config) sensor = pipeline.get_active_profile().get_device().query_sensors( )[1] sensor.set_option(rs.option.enable_auto_exposure, True) frames = pipeline.wait_for_frames() f_depth = frames.get_depth_frame() f_color = frames.get_color_frame() # depth_profile = f_depth.get_profile() # color_profile = f_color.get_profile() # print(depth_profile) # print(color_profile) im = cv2.cvtColor(f_color, cv2.COLOR_BGR2RGB) im_depth = cv2.cvtColor(f_depth, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] im_t = cv2.cvtColor(f_color, cv2.COLOR_BGR2RGB) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) x1, y1, x2, y2 = [int(i) for i in bb_xyxy] im_person = im_t[y1:y2, x1:x2, :] / 255 pose, colors, pairs, colors_skeleton = pose_estimate( im_person) pose += np.array([x1, y1]) pose = pose.astype(np.int) for idx in range(len(colors)): # cv2.circle(ori_im, (pose[idx, 0], pose[idx, 1]), 3, colors[idx], thickness=3, lineType=8, # shift=0) cv2.circle(f_color, (pose[idx, 0], pose[idx, 1]), 3, colors[idx], thickness=3, lineType=8, shift=0) for idx in range(len(colors_skeleton)): # ori_im = cv2.line(ori_im, (pose[pairs[idx][0], 0], pose[pairs[idx][0], 1]), # (pose[pairs[idx][1], 0], pose[pairs[idx][1], 1]), colors_skeleton[idx], 3) f_color = cv2.line( f_color, (pose[pairs[idx][0], 0], pose[pairs[idx][0], 1]), (pose[pairs[idx][1], 0], pose[pairs[idx][1], 1]), colors_skeleton[idx], 3) # ori_im = draw_boxes(ori_im, bbox_xyxy, identities) # ori_im = draw_ID(ori_im, im_depth, bbox_xyxy) f_color = draw_boxes(f_color, bbox_xyxy, identities) f_color = draw_ID(f_color, im_depth, bbox_xyxy) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: # cv2.imshow("test", ori_im) cv2.imshow("test", f_color) cv2.waitKey(1) if self.args.save_path: # self.writer.write(ori_im) self.writer.write(f_color) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 idx_tracked = None bbox_xyxy = [] bbox_pub = rospy.Publisher("/bbox_center", Point, queue_size=10) angle_pub = rospy.Publisher("/target_angle", Float32, queue_size=10) while self.vdo.grab() and not rospy.is_shutdown(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue if idx_frame < args.load_from: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking if idx_tracked: outputs = self.deepsort.update(bbox_xywh, cls_conf, im, tracking_target=idx_tracked) else: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) #idx_tracked = 0 # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) #print(bbox_xyxy) #print(idx_tracked) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() #draw frame count font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (10, 500) fontScale = 1 fontColor = (255, 255, 255) lineType = 2 frame_count = ("Frame no: %d" % idx_frame) cv2.putText(ori_im, frame_count, bottomLeftCornerOfText, font, fontScale, fontColor, lineType) #draw tracking number if idx_tracked: tracking_str = ("Tracking: %d" % idx_tracked) else: tracking_str = ("Tracking: None") bottomLeftCornerOfText = (10, 550) cv2.putText(ori_im, tracking_str, bottomLeftCornerOfText, font, fontScale, fontColor, lineType) #get user input on target to track if self.args.display: cv2.imshow("test", ori_im) if cv2.waitKey(1) == ord('i'): print("\nEnter target number for constant tracking") user_input = input() idx_tracked = int(user_input) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("frame: {}, time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(idx_frame, end-start, 1/(end-start), bbox_xywh.shape[0], len(outputs))) #publishing if idx_tracked is not None: x_center = (bbox_xyxy[0][0] + bbox_xyxy[0][2]) / 2 y_center = (bbox_xyxy[0][1] + bbox_xyxy[0][3]) / 2 fov = 60 pixel_per_angle = im.shape[1] / fov x_center_adjusted = x_center - (im.shape[1] / 2) print(x_center_adjusted) angle = x_center_adjusted / pixel_per_angle print(angle) bbox_pub.publish(x_center, y_center, 0) angle_pub.publish(angle) rate = rospy.Rate(20) #10Hz rate.sleep() rospy.loginfo("Stopped sending bounding boxes")
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class # mask = cls_ids == 0 mask = cls_ids != -1 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] cls_ids = cls_ids[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, cls_ids, im, idx_frame) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -2] class_idxs = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append( (idx_frame - 1, bbox_tlwh, class_idxs, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs))) with open(self.save_length_path, 'wb') as f: pickle.dump(idx_frame, f) tracks = self.deepsort.get_tracks() with open(self.save_tracks_path, 'wb') as f: pickle.dump(tracks, f)
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = np.copy(ori_im) #im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) #print(bbox_xywh) #print(cls_conf) # select person class mask = cls_ids == 0 #for yolov3 mask = cls_ids == 1 #for weilei head mask = cls_ids != 0 #for liyang head bbox_xywh = bbox_xywh[mask] # add by bigz if len(bbox_xywh) == 0: continue #for test # for b in bbox_xywh: # cv2.rectangle(ori_im,(int(b[0]),int(b[1])),(int(b[0])+int(b[2]),int(b[1])+int(b[3])),(255,0,0),1,0) # cv2.imshow('preview', ori_im) # if (cv2.waitKey(-1) & 0xFF) != ord('q'): # continue # else: # break # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector #bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking # 怀疑update改动了bbox_xywh里面的值,引起了形变? outputs = self.deepsort.update(bbox_xywh, cls_conf, im) #import ipdb #ipdb.set_trace() # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) if len(im) == 0: continue # do detection # bbox_xywh, cls_conf, cls_ids = self.detector(im) bbox_xywh, cls_conf, cls_ids = v4detector.YOLO(im) if len(bbox_xywh) == 0: continue print("detection cls_ids:", cls_ids) # #filter cls id for tracking # print("cls_ids") # print(cls_ids) # # select person class mask = [] # lst_for_track = [] for id in cls_ids: if id in lst_move_life: # lst_for_track.append(id) mask.append(True) else: mask.append(False) print("mask cls_ids:", mask) bbox_xywh = bbox_xywh[mask] # # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im, cls_ids) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, 4:5] cls_id = outputs[:, -1] print("track res cls_id:", cls_id) # cls_ids_show = [cls_ids[i] for i in cls_id] ori_im = draw_boxes(ori_im, bbox_xyxy, cls_ids, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end-start, 1/(end-start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 frames = self.video_frams model = self.model all_time = time_synchronized() while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time_synchronized() _, ori_im = self.vdo.retrieve() # im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(ori_im, model) if not len(cls_ids): end = time_synchronized() outputs = [] self.logger.info( "time: {:.03f}s, fps: {:.03f}, frame: {}/{}, tracking numbers: {}" .format(end - start, 1 / (end - start), idx_frame, frames, len(outputs))) print("Time: {:.03f}s, Fps: {:.03f}".format( end - all_time, idx_frame / (end - all_time))) continue # select person class # mask = cls_ids == 0 # bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # bbox_xywh[:, 3:] *= 1.2 # cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, ori_im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time_synchronized() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info( "time: {:.03f}s, fps: {:.03f}, frame: {}/{}, tracking numbers: {}" .format(end - start, 1 / (end - start), idx_frame, frames, len(outputs))) print("Time: {:.03f}s, Fps: {:.03f}".format( end - all_time, idx_frame / (end - all_time)))
def run(self): results = [] fps = [] idx_frame = 0 detection_dict = {} while self.vdo.grab(): # if idx_frame % self.args.frame_interval: # continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) height, width = im.shape[:2] # do detection bbox_xywh, cls_conf, cls_ids = self.get_next_detection( im, idx_frame) # print(bbox_xywh, cls_conf, cls_ids) # # if idx_frame == 3: # # break if args.save_detection: detection_dict[idx_frame] = [bbox_xywh, cls_conf, cls_ids] idx_frame += 1 # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] # x_scale = int(np.round(1920/width)) # y_scale = int(np.round(1080/height)) # x_scale = 1920/width # y_scale = 1080/height # bbox_xyxy *= np.array([[x_scale, y_scale, x_scale, y_scale]], dtype=np.int32) identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities, force_resolution=args.force_resolution) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging fps.append(1 / (end - start)) self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs))) self.logger.info("Average fps is {:.03f}".format(sum(fps) / len(fps))) if args.save_detection: res = re.findall(r'\d+', self.video_path)[0] save_path = str(args.detection_model) + "_" + str( args.sample_rate).replace(".", "") + "_" + res + ".pkl" self.logger.info(f"Saving detection results to {save_path}") with open(os.path.join("output", save_path), "wb") as f: pickle.dump(detection_dict, f)
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection # bbox_xywh, cls_conf, cls_ids = self.detector(im) det_result = self.detector.infer_cv_img(ori_im) # print(det_result.shape) # exit(1) if len(det_result) <= 0: det_result = np.empty((0, 6)) cls_conf = det_result[:, 4] cls_ids = det_result[:, 5] # face boxes mask = cls_ids == 1 face_bbox_xyxy = det_result[:, 0:4][mask] det_result[:, 2:4] = det_result[:, 2:4] - det_result[:, 0:2] # xyxy2xywh det_result[:, 0:2] = det_result[:, 0:2] + det_result[:, 2:4] * 0.5 # print(det_result.shape) bbox_xywh = det_result[:, 0:4] end_yolo_time = time.time() - start self.logger.info("yolo det time:{:.03f}s".format(end_yolo_time)) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] # ori_im = draw_boxes(ori_im, bbox_xyxy, identities) # # person_face_boxes = get_person_face_box( bbox_xyxy, identities, face_bbox_xyxy) ori_im = draw_person_face_boxes(ori_im, person_face_boxes) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(0) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() outputs = [] if args.backbone == 'yolov3': im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking if len(bbox_xywh) > 0 : outputs = self.deepsort.update(bbox_xywh, cls_conf, im) elif args.backbone == 'retinaface': from retinaface import eval_widerface img = torch.from_numpy(ori_im).permute(2, 0, 1).unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(img, self.detector, score_threshold=0.5, iou_threshold=0.3) bbox_xywh = [] if np.array(picked_boxes).ndim == 3: picked_boxes = np.squeeze(np.array(picked_boxes)) for box in picked_boxes: if box is None: break x,y,w,h = _xyxy_to_xywh(box) box=[x,y,w,h] bbox_xywh.append(box) bbox_xywh = np.array(bbox_xywh) cls_conf = np.array(picked_scores) if np.array(cls_conf).ndim == 2: cls_conf = np.squeeze(cls_conf) # do tracking if len(bbox_xywh) > 0 : outputs = self.deepsort.update(bbox_xywh, cls_conf, ori_im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() ref, ori_im = self.vdo.retrieve() if ref is True: im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) #----- do detection frame = Image.fromarray(np.uint8(im)) bbox_xywh, cls_conf, cls_ids = self.detector.new_detect(frame) if cls_conf is not None: #-----copy list_fin = [] for i in bbox_xywh: temp = [] temp.append(i[0]) temp.append(i[1]) temp.append(i[2] * 1.) temp.append(i[3] * 1.) list_fin.append(temp) new_bbox = np.array(list_fin).astype(np.float32) #-----#-----mask processing filter the useless part mask = [ 0, 1, 2, 3, 5, 7 ] # keep specific classes the indexes are corresponded to coco_classes mask_filter = [] for i in cls_ids: if i in mask: mask_filter.append(1) else: mask_filter.append(0) new_cls_conf = [] new_new_bbox = [] new_cls_ids = [] for i in range(len(mask_filter)): if mask_filter[i] == 1: new_cls_conf.append(cls_conf[i]) new_new_bbox.append(new_bbox[i]) new_cls_ids.append(cls_ids[i]) new_bbox = np.array(new_new_bbox).astype(np.float32) cls_conf = np.array(new_cls_conf).astype(np.float32) cls_ids = np.array(new_cls_ids).astype(np.float32) #-----#----- # do tracking outputs = self.deepsort.update(new_bbox, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append( self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), new_bbox.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) dict_images = {} dict_images[str(idx_frame)] = Image.fromarray(im) # do detection detection_result = tensorflow_detection_tf2( value_threshold='0.7', dict_images=dict_images, num_classes=7, vectores_interes=VECTORES_INTERES, categories=CATEGORIES, max_width_crop=1920, max_height_crop=1080, detect_fn=self.detect_fn) objects_detected = np.array(detection_result["objects_detected"], dtype=np.float) if self.tracker_type == "DEEPSORT": bbox_xywh, cls_conf, cls_ids = get_deep_format( np.copy(objects_detected)) # do tracking trackers = self.mot_tracker.update(bbox_xywh, cls_conf, cls_ids, im) elif self.tracker_type == "SORT": trackers = self.mot_tracker.update(objects_detected) ori_im = custom_draw(ori_im, trackers) # draw boxes for visualization if len(trackers) > 0: bbox_tlwh = [] bbox_xyxy = trackers[:, :4].astype(np.int32) identities = trackers[:, 4].astype(np.int32) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.mot_tracker._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') if idx_frame % 200 == 0: # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}, idx_frame: {}" \ .format(end - start, 1 / (end - start), objects_detected.shape[0], len(trackers), idx_frame))
def run(self): results = [] idx_frame = 0 idx_tracked = None while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue if idx_frame < args.load_from: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking if idx_tracked: outputs = self.deepsort.update(bbox_xywh, cls_conf, im, tracking_target=idx_tracked) else: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) #idx_tracked = 0 # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() #draw frame count font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (10, 500) fontScale = 1 fontColor = (255, 255, 255) lineType = 2 frame_count = ("Frame no: %d" % idx_frame) cv2.putText(ori_im, frame_count, bottomLeftCornerOfText, font, fontScale, fontColor, lineType) #draw tracking number if idx_tracked: tracking_str = ("Tracking: %d" % idx_tracked) else: tracking_str = ("Tracking: None") bottomLeftCornerOfText = (10, 550) cv2.putText(ori_im, tracking_str, bottomLeftCornerOfText, font, fontScale, fontColor, lineType) #get user input on target to track if self.args.display: cv2.imshow("test", ori_im) if cv2.waitKey(1) == ord('i'): print("\nEnter target number for constant tracking") user_input = input() idx_tracked = int(user_input) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("frame: {}, time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(idx_frame, end-start, 1/(end-start), bbox_xywh.shape[0], len(outputs)))
def run(self): results = [] idx_frame = 0 for img_file in self.images: # print(img_file) txt_file = img_file.split('.')[0] + '.txt' # print(txt_file) idx_frame += 1 # if idx_frame % self.args.frame_interval: # continue start = time.time() # print(img_file) ori_im = cv2.imread(img_file) im = ori_im height, weight, _ = im.shape # print(self.detector(im)) bbox_xywh, cls_conf, cls_ids = self.detector.read_txt( height, weight, txt_file, args.conf_scores) #print(bbox_xywh) #print(len(bbox_xywh)) if len(bbox_xywh) == 0: continue else: # select person class mask = cls_ids == 0 # print(bbox_xywh) bbox_xywh = bbox_xywh[mask] # print(len(bbox_xywh)) # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector #bbox_xywh[:,3:] *= 1.05 cls_conf = cls_conf[mask] im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] #ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 2, bbox_tlwh, identities)) # end = time.time() # if self.args.display: # cv2.imshow("test", ori_im) # cv2.waitKey(1) # if self.args.save_path: # self.writer.write(ori_im) # if self.args.display: # cv2.imshow("test", ori_im) # cv2.waitKey(1) # save results write_results(self.save_results_path, results, 'mot')
def run(self): results = [] # idx_frame = 0 import json frame_gen = self._frame_from_video(self.vdo) for idx_frame, ori_im in tqdm(enumerate(frame_gen),total=self.vdo.get(7)): # while self.vdo.grab(): # idx_frame += 1 # print(idx_frame) if idx_frame % self.args.frame_interval: continue start = time.time() # _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class # mask = cls_ids == 0 person = cls_ids == 0 bicycle = cls_ids == 1 car = cls_ids == 2 motorcycle = cls_ids == 3 airplane = cls_ids == 4 bus = cls_ids == 5 truck = cls_ids == 7 traffic_light = cls_ids == 9 cat = cls_ids == 15 dog = cls_ids == 16 umbrella = cls_ids == 25 handbag = cls_ids == 26 suitcase = cls_ids == 28 cellphone = cls_ids == 67 stopsign = cls_ids == 11 parking_meter = cls_ids == 12 bench = cls_ids == 13 mask = person + car + bicycle + motorcycle + bus + traffic_light + airplane + truck + cat + dog + umbrella + handbag + suitcase + cellphone + stopsign + bench + parking_meter bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # 20200602 cls_ids = cls_ids[mask] outputs = self.deepsort.update(bbox_xywh, cls_ids, cls_conf, im) # do tracking # outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, 4] type = outputs[:, -1] # ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame, bbox_tlwh, identities, type)) end = time.time() # if self.args.display: # cv2.imshow("test", ori_im) # cv2.waitKey(1) # if self.args.save_path: # self.writer.write(ori_im) # save results if(idx_frame%100==0): write_results(self.save_results_path, results, 'mot') # logging # if (idx_frame % 50 == 0): # self.logger.info("frame: {:d}, time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ # .format(idx_frame,end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs))) write_results(self.save_results_path, results, 'mot') return results
def run(self): results = [] idx_frame = 0 mean_end_effector = torch.tensor((-2.6612e-05, -7.8652e-05)) std_end_effector = torch.tensor((0.0025, 0.0042)) mean_arm = torch.tensor([-1.3265e-05, -6.5026e-06]) std_arm = torch.tensor([0.0030, 0.0185]) mean_probe = torch.tensor([-5.1165e-05, -7.1806e-05]) std_probe = torch.tensor([0.0038, 0.0185]) mean_ped = torch.tensor([0.0001, 0.0001]) std_ped = torch.tensor([0.0001, 0.0001]) while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) height, width = ori_im.shape[:2] bbox_xywh, cls_conf, cls_ids = self.detector(im) print("cls_ids") print(cls_conf) print(cls_ids) for i in range(3): mask = cls_ids == i t_cls_conf = cls_conf[mask] t_bbox_xywh = bbox_xywh[mask] if t_cls_conf.size > 0: pt = [ t_bbox_xywh[np.argmax(t_cls_conf)][0] / width, t_bbox_xywh[np.argmax(t_cls_conf)][1] / height ] t_id = i if t_id in self.Q: self.Q[t_id][0].append(pt) else: self.Q[t_id] = [[pt]] # select person class mask = cls_ids == 3 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector #bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) for i in range(len(outputs)): t_id = outputs[i][ 4] + 5 # added with 5 so that ped id will not clash with id's of end_effector arm and probe pt = [(int(outputs[i][0]) + int(outputs[i][2])) / (2 * width), (int(outputs[i][1]) + int(outputs[i][3])) / (2 * height)] #print(pt) if t_id in self.Q: self.Q[t_id][0].append(pt) else: self.Q[t_id] = [[pt]] # print(self.Q) for i in self.Q: if (len(self.Q[i][0])) == 8: Q_np = np.array(self.Q[i], dtype=np.float32) Q_d = Q_np[:, 1:, 0:2] - Q_np[:, :-1, 0:2] pr = [] inp = torch.from_numpy(Q_d) #print(i) #print(inp) if i == 0: inp = (inp.to(device) - mean_end_effector.to(device) ) / std_end_effector.to(device) elif i == 1: inp = (inp.to(device) - mean_arm.to(device)) / std_arm.to(device) elif i == 2: inp = (inp.to(device) - mean_probe.to(device)) / std_probe.to(device) else: inp = (inp.to(device) - mean_ped.to(device)) / std_ped.to(device) src_att = torch.ones( (inp.shape[0], 1, inp.shape[1])).to(device) start_of_seq = torch.Tensor([ 0, 0, 1 ]).unsqueeze(0).unsqueeze(1).repeat(inp.shape[0], 1, 1).to(device) dec_inp = start_of_seq print("predicting trajectory") for itr in range(12): trg_att = subsequent_mask(dec_inp.shape[1]).repeat( dec_inp.shape[0], 1, 1).to(device) if i == 0: out = self.traj_endeffector( inp, dec_inp, src_att, trg_att) elif i == 1: out = self.traj_arm(inp, dec_inp, src_att, trg_att) elif i == 2: out = self.traj_probe(inp, dec_inp, src_att, trg_att) else: out = self.traj_ped(inp, dec_inp, src_att, trg_att) dec_inp = torch.cat((dec_inp, out[:, -1:, :]), 1) if i == 0: preds_tr_b = ( dec_inp[:, 1:, 0:2] * std_end_effector.to(device) + mean_end_effector.to(device) ).detach().cpu().numpy().cumsum(1) + Q_np[:, -1:, 0:2] elif i == 1: preds_tr_b = (dec_inp[:, 1:, 0:2] * std_arm.to(device) + mean_arm.to(device)).detach().cpu( ).numpy().cumsum(1) + Q_np[:, -1:, 0:2] elif i == 2: preds_tr_b = ( dec_inp[:, 1:, 0:2] * std_probe.to(device) + mean_probe.to(device) ).detach().cpu().numpy().cumsum(1) + Q_np[:, -1:, 0:2] else: preds_tr_b = (dec_inp[:, 1:, 0:2] * std_ped.to(device) + mean_ped.to(device)).detach().cpu( ).numpy().cumsum(1) + Q_np[:, -1:, 0:2] pr.append(preds_tr_b) pr = np.concatenate(pr, 0) self.Q[i][0].pop(0) co = (0, 255, 0) # green cp = (0, 0, 255) # red #print(pr) for j in range(11): pp1 = (int(pr[0, j, 0] * width), int(pr[0, j, 1] * height)) pp2 = (int(pr[0, j + 1, 0] * width), int(pr[0, j + 1, 1] * height)) #ori_im = cv2.circle(ori_im, pp, 3, cp, -1) ori_im = cv2.line(ori_im, pp1, pp2, cp, 2) for j in range(7): op1 = (int(Q_np[0, j, 0] * width), int(Q_np[0, j, 1] * height)) op2 = (int(Q_np[0, j + 1, 0] * width), int(Q_np[0, j + 1, 1] * height)) #ori_im = cv2.circle(ori_im, op, 3, co, -1) ori_im = cv2.line(ori_im, op1, op2, co, 2) cv2.imshow("test", ori_im) cv2.waitKey(1) # draw boxes for visualization # if len(outputs) > 0: # bbox_tlwh = [] # bbox_xyxy = outputs[:, :4] # identities = outputs[:, -1] # ori_im = draw_boxes(ori_im, bbox_xyxy, identities) # # for bb_xyxy in bbox_xyxy: # bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) # # results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() # if self.args.display: # cv2.imshow("test", ori_im) # cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot')
def run(self): results = [] idx_frame = 0 bar = tqdm(total=self.frames_num) while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) sec = (idx_frame - 1) // self.fps sec_key = self.video_name + '.' + str(sec + 900) if self.args.with_detection and ( idx_frame - 1) % self.fps == 0 and sec_key in self.vt_box_dict: try: hboxes = self.vt_box_dict[sec_key] bbox_xyxy = np.empty((0, 4), dtype=float) cls_conf = np.empty((0, ), dtype=float) for box_info in hboxes: box, conf = box_info bbox_xyxy = np.concatenate( (bbox_xyxy, np.array([box])), axis=0) cls_conf = np.concatenate((cls_conf, np.array([conf])), axis=0) bbox_xyxy[:, 0] *= float(self.im_width) bbox_xyxy[:, 1] *= float(self.im_height) bbox_xyxy[:, 2] *= float(self.im_width) bbox_xyxy[:, 3] *= float(self.im_height) bbox_xywh = bbox_xyxy.copy() bbox_xywh[:, 2] = bbox_xywh[:, 2] - bbox_xywh[:, 0] bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1] except: ipdb.set_trace() else: # do detection bbox_xywh, cls_conf, cls_ids = self.detector(im) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, im) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') bar.update(1) self.video_bar.update(0)
def run(self): results = [] idx_frame = 0 while self.vdo.grab(): idx_frame += 1 if idx_frame % self.args.frame_interval: continue start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) #print(im.shape) #video_boyu,video_eni,3 # do detection bbox_xywh, cls_conf, cls_ids = self.detector( im) #bbox_xywh, confidence, labels #gt'leri gt'den okuyarak yolo yerine veren kısım if (self.args.gt): #py çalıştırılırken --gt yazıldıysa if (idx_frame == 1 or idx_frame == 2 or idx_frame == 3 ): #üç frame boyunca gt verileri yolo yerine veriliyor gt_curr_frame = self.gt[self.gt[:, 0] == idx_frame].astype( 'float64' ) #filtreli gt verilerinden içinde bulunuğunuz kısım çıkarılıyor gt_curr_frame = gt_curr_frame[:, 2: 6] #tlwh tipinde veriler alınıyor #print(gt_curr_frame) #print(self.my_tlwh_to_xywh(im, gt_curr_frame)) bbox_xywh = self.my_tlwh_to_xywh( im, gt_curr_frame) #yolo yerine gt bboxları cls_conf = np.ones( (bbox_xywh.shape[0], ), dtype=int) #yolo conf skorları yerine (tüm skorlar 1) cls_ids = np.zeros( bbox_xywh.shape[0] ) #bütün bboxlar yolo için 0 id'li yani person. ori_im = draw_boxes( ori_im, self.my_tlwh_to_xyxy( im, gt_curr_frame)) #gt'deki bboxları çizdir print("yolo yerine gt kullanıldı, frame: ", idx_frame) #test amaçlı bilerek yanlış vererek başlangıçtaki verilerin tracker üzerindeki etkisini incelemek için """ bbox_xywh = np.array([[100,200,400.1,600.1],[500,600.1,600.1,800.1]]) #test amaçlı bilerek yanlış vermek için cls_conf = np.ones((bbox_xywh.shape[0],), dtype=int) #test amaçlı bilerek yanlış vermek için cls_ids = np.zeros(bbox_xywh.shape[0]) #test amaçlı bilerek yanlış vermek için ori_im = draw_boxes(ori_im, bbox_xywh) """ """ labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] """ # select person class 0-people 22-zebra 20-elephant #mask = (cls_ids == 20) + (cls_ids == 22) mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking outputs = self.deepsort.update( bbox_xywh, cls_conf, im) #im.shape = video_boyu,video_eni,3 #print(bbox_xywh) # number_of_detection, 4 #print(cls_conf) # number_of_detection, # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] #detection'ları ekrana çizen kendi yazdığım kod #ori_im = draw_boxes(ori_im, self.my_xywh_to_xyxy(im,bbox_xywh)) #doğru eşleşmeleri ekrana çizen orjinal kod ori_im = draw_boxes(ori_im, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) end = time.time() if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.writer.write(ori_im) # save results write_results(self.save_results_path, results, 'mot') # logging self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))