def __init__(self,args, data_set,ENABLE_TRACKING=None): self.tracker = Sort() self.args = args self.nms_thres = args.nms self.triplet_nms_thres =args.triplet_nms self.obj_thres = args.obj_thres self.triplet_thres = args.triplet_thres self.tobefiltered_objects = [26, 53, 134, 247, 179, 74, 226, 135, 145, 300, 253, 95, 11, 102,87] # 26: wheel, 53: backpack, 143:light, 247:camera, 179:board # 74:shoe, 226:chair, 135:shelf, 145:button, 300:cake, 253:knob, 95:wall, 11:door, 102:mirror,87:ceiling if ENABLE_TRACKING == None: self.ENABLE_TRACKING = False if self.args.dataset == 'visual_genome' else True else: self.ENABLE_TRACKING = ENABLE_TRACKING if self.ENABLE_TRACKING and self.args.path_opt.split('/')[-1] == 'VG-DR-Net.yaml': self.tobefiltered_predicates = [0,6,10,18,19,20,22,23,24] # 0:backgrounds, 6:eat,10:wear, 18:ride, 19:watch, 20:play, 22:enjoy, 23:read, 24:cut elif self.ENABLE_TRACKING and self.args.path_opt.split('/')[-1] == 'VG-MSDN.yaml': self.tobefiltered_predicates = [12, 18, 27, 28, 30, 31, 32, 35] else: self.tobefiltered_predicates = [] # Params for Statistics Based Scene Graph Inference self.relation_statistics = relation_prior.load_obj("model/prior/preprocessed/relation_prior_prob") self.joint_probability = relation_prior.load_obj("model/prior/preprocessed/object_prior_prob") self.spurious_rel_thres = 0.07 self.rel_infer_thres = 0.9 self.obj_infer_thres = 0.001 self.data_set = data_set self.detected_obj_set = set() self.fasttext = torchtext.vocab.FastText() self.word_vecs, self.word_itos,self.word_stoi = self.prepare_wordvecs(num_vocabs=400,ignores=VG_DR_NET_OBJ_IGNORES) self.pred_stoi = {self.data_set.predicate_classes[i]: i for i in range(len(self.data_set.predicate_classes))}
def __init__(self, classes, tracker='sort'): self.ttype = tracker self.classes = classes if tracker == 'deep_sort': from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) #param self.nms_max_overlap = 0.1 #param model_path = os.path.join(WORK_DIR, MODEL_DIR, "mars-small128.ckpt-68577") self.encoder = generate_detections.create_box_encoder(model_path) self.tracker = Tracker(metric) from deep_sort.application_util import preprocessing as prep from deep_sort.deep_sort.detection import Detection self.prep = prep self.Detection = Detection elif tracker == 'sort': from sort.sort import Sort self.tracker = Sort() self.trackers = {}
def initTrack(self): if (self.track): self.progressBar.setValue(20) if self.options["track"]: if self.options["tracker"] == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker self.progressBar.setValue(50) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) self.tracker = Tracker(metric) self.encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577" )) elif self.options["tracker"] == "sort": from sort.sort import Sort self.encoder = None self.tracker = Sort() self.progressBar.setValue(50) if self.options["BK_MOG"] and self.options["track"]: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() self.progressBar.setValue(60) self.initTFNet() else: self.initTFNet()
def gui(self): source = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None App(tkinter.Tk(), "Tkinter and OpenCV", 0, tracker, encoder)
def mergeSort(self): test = [ 23, 2, 1, 4213, 43, 1, 21, 4, 1, 213, 43, 23423, 53, 21, 221, 342312123 ] sortObj = Sort() sortObj.mergeSort2(test, 0, len(test) - 1, 'main') print(test)
def insertionSort(self): test = [ 23, 2, 1, 4213, 43, 1, 21, 4, 1, 213, 43, 23423, 53, 21, 221, 342312123 ] sortObj = Sort() sortObj.insertionSort_While(test) print(test)
def input_track(self): """ Utility function to initialize the sort algorithm :return: None """ from sort.sort import Sort Tracker = Sort() return Tracker, None
def input_track(self): """ Utility function to initialize the sort algorithm :return: None """ if self.options.tracker == 'sort': from sort.sort import Sort encoder = None Tracker = Sort() return Tracker, encoder
def assign_ids(detections): """ :param detections: :return: """ mot_tracker = Sort() tracked_detections = [] for detections_frame_bboxes in detections: if len(detections_frame_bboxes) == 0: detections_frame_bboxes = np.zeros((0, 5)) tracked_detections.append( mot_tracker.update(np.array(detections_frame_bboxes))) return tracked_detections
def video_detect(model, path_to_video, threshold=0.6, track=True): mot_tracker = Sort() cap = cv2.VideoCapture(path_to_video) out = cv2.VideoWriter(path_to_video + '-detections.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.0, (640, 480)) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') with torch.no_grad(): model.eval() model.to(device) while cap.isOpened(): ret, frame = cap.read() if not ret: print('No more frames') break pil_img = Image.fromarray(frame) tensor_img = to_tensor(pil_img).unsqueeze_(0) dets = model(tensor_img.to(device)) if track: tracked_dets = None for box, score in zip(dets[0]['boxes'], dets[0]['scores']): if score.item() >= threshold: tracked_det = np.array([ torch.cat( (box, score.reshape(1))).detach().cpu().numpy() ]) tracked_dets = np.concatenate( (tracked_dets, tracked_det )) if tracked_dets is not None else tracked_det tracked_dets = mot_tracker.update( tracked_dets if tracked_dets is not None else np.empty(( 0, 5))) out.write(np.array(draw_object_id(tracked_dets, pil_img))) else: out.write( np.array( draw_class_labels(dets, tensor_img, get_coco_classes(), threshold=threshold)[0])) cap.release() out.release() cv2.destroyAllWindows()
def track(video_path, use_gpu=False): video = cv2.VideoCapture(video_path) ret, frame = video.read() if ret: frame = cv2.resize(frame, (input_width, input_height)) if use_gpu: caffe.set_mode_gpu() tracker = Sort(max_age=10) detector = Detector() classes = detector.get_classes() while ret: frame_disp = np.copy(frame) bounding_boxes, counting = detector.infer(frame) class_counting = zip(classes, counting) for pair in class_counting: print('{:s} {:03d}'.format(*pair)) print('') if len(bounding_boxes) > 0: bounding_boxes = np.array(bounding_boxes, np.int32) # convert (x, y, w, h) to (x1, y1, x2, y2) bounding_boxes[:, 2:4] += bounding_boxes[:, 0:2] bounding_boxes[:, 2:4] -= 1 track_results = tracker.update(bounding_boxes) draw_tracking_results(track_results, frame_disp) cv2.imshow('tracking', frame_disp) key = cv2.waitKey(1) if key == 27: return ret, frame = video.read() if ret: frame = cv2.resize(frame, (input_width, input_height))
def __init__(self, **kwargs): print(kwargs) self._state = {} self._statistics = [] self._class_count = dict( zip(kwargs['class_ids'], np.zeros(len(kwargs['class_ids'])))) # if distance between centers of two bboxes is less than _max_distance then object is staying self._max_distance = kwargs[ 'max_distance'] if 'max_distance' in kwargs else DEFAULT_MAX_DISTANCE_BETWEEN_POINTS # after _warmup_frames we start to compare bbox's centers for one tracked object self._warmup_frames = kwargs[ 'warmup_frames'] if 'warmup_frames' in kwargs else DEFAULT_WARMUP_FRAMES self._line_y = kwargs['line_y'] if 'line_y' in kwargs else 0 min_hits = kwargs[ 'min_hits'] if 'min_hits' in kwargs else DEFAUTL_MIN_HITS max_age = kwargs['max_age'] if 'max_age' in kwargs else DEFAULT_MAX_AGE #self.display_config() self._mot_tracker = Sort(max_age, min_hits)
def __init__( self, cap, mask_filename, warp_filename, threshold, prefix="", output_path="crop_images", ): """CaptureProcessor starts a thread for processing ROIs defined in a mask file. The processor does the following tasks: - Crops the images to match masks - Warps ROI images to remove perspective distortion (if necessary) - Saves ROI images to file system (encrypted if necessary) - Detects vehicles in ROIs using Yolo object detection - Tracks vehicles using SORT algorithm - Saves metadata to a JSON file Args: cap (cv2.VideoCapture): OpenCV's VideoCapture object for either camera or video stream mask_filename (str): Filename of mask file in PNG format warp_filename (str): Filename of warp file in JSON format threshold (int): Threshold for perceptual hash to detect motion in ROI prefix (str, optional): Prefix for image and metadata files. Defaults to "". output_path (str, optional): Folder to save images and metadata. Defaults to "crop_images". """ self.keep_processing = False self.cap = cap self.threshold = threshold self.prefix = prefix self.output_path = output_path self.mask_filename = mask_filename self.warp_filename = warp_filename self.image_cache = [] self.keep_sending_after_phash_diff = 2.5 # seconds self.yolo = Yolov5() self.tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3)
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track : if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath("deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track : fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() camera = cv2.VideoCapture(file[0]) camera1 = cv2.VideoCapture(file[1]) if file == 0: self.say('Press [ESC] to quit video') assert camera.isOpened(), \ 'Cannot capture source' if self.FLAGS.csv : f = open('{}.csv'.format(file),'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id' , 'x', 'y', 'w', 'h']) f.flush() else : f =None writer= None if file == 0:#camera window cv2.namedWindow('', 0) _, frame = camera.read() height, width, _ = frame.shape cv2.resizeWindow('', width, height) else: ret, frame = camera.read() ret1, frame1 = camera1.read() height, width, _ = (frame.shape[0], (frame.shape[1]+frame1.shape[1]), 3) if SaveVideo: fourcc = cv2.VideoWriter_fourcc(*'XVID') if file == 0:#camera window fps = 1 / self._get_fps(frame) if fps < 1: fps = 1 else: fps = round(camera.get(cv2.CAP_PROP_FPS)) videoWriter = cv2.VideoWriter( "/".join(filepath.split("/")[:-1]) + '/output_{}'.format(filepath.split("/")[-1]), fourcc, fps, (width, height)) # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while (camera.isOpened() and camera1.isOpened()): elapsed += 1 _, frame = camera.read() _, frame1 = camera1.read() if frame is None: print ('\nEnd of Video') break if self.FLAGS.skip != n : n+=1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track : fgmask = fgbg.apply(frame) else : fgmask = None vis = np.concatenate((frame, frame1), axis=1) preprocessed = self.framework.preprocess(vis) buffer_inp.append(vis) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track : postprocessed = self.framework.postprocess( single_out, img) else : postprocessed = self.framework.postprocess( single_out, img, frame_id = elapsed, csv_file=f,csv=writer,mask = fgmask, encoder=encoder,tracker=tracker) # postprocessed1 = self.framework.postprocess( # single_out, img[:height,int(width/2):width, :3],frame_id = elapsed, # csv_file=f,csv=writer,mask = fgmask, # encoder=encoder,tracker=tracker) if SaveVideo: videoWriter.write(postprocessed) if self.FLAGS.display : cv2.imshow('', postprocessed) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write('{0:3.3f} FPS'.format( elapsed / (timer() - start))) sys.stdout.flush() if self.FLAGS.display : choice = cv2.waitKey(1) if choice == 27: break sys.stdout.write('\n') if SaveVideo: videoWriter.release() if self.FLAGS.csv : f.close() camera.release() if self.FLAGS.display : cv2.destroyAllWindows()
if len(sys.argv)==1: # display help message when no args are passed. parser.print_help() sys.exit(1) if args.date is None or args.campose is None: raise argparse.ArgumentTypeError('Please specify the date and camera pose for video clips first!') else: date = args.date cam_pose = args.campose total_pcount_each_minute = np.zeros((12, 60), dtype=np.int32) # 12 hours from 10am to 22pm # prepare id tracker mot_tracker = Sort(max_age=10, min_hits=3) for hour in np.arange(10,22): for minute in np.arange(60): print("loading ../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4".format(cam_pose, date, hour, minute)) cap = cv2.VideoCapture('../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4'.format(cam_pose, date, hour, minute)) mot_tracker.update([]) # just in case the first file does not exist while (cap.isOpened()): ret, frame = cap.read() if ret: # resize img = cv2.resize(frame, net_shape[::-1], interpolation=cv2.INTER_CUBIC) # start = time.time() rclasses, rscores, rbboxes = process_image(img, net_shape=net_shape)
def track(data_file, reverse=False, verbose=0): if (verbose == 1): print("Opening File...") f = h5py.File(data_file, "r+") mot_tracker = Sort() tracks_n = f["tracks_n"].value[0] start_count = find_start_count(list(f.keys())) if (not reverse): frame_indices = range(start_count, f['frame_number'].value[0]) else: frame_indices = reversed(range(start_count, f['frame_number'].value[0])) if (verbose == 1): print("Starting loop...") for i in frame_indices: frame = "frame{}".format(i) bbox_handle = f[frame]['rois'] detection = bbox_handle.value scores = f[frame]['scores'].value number_of_masks = scores.shape[0] detection_with_scores = np.hstack( (detection, np.reshape(scores, (-1, 1)))) if (verbose == 1): print("detections with scores:") print(detection_with_scores) track_bbs_ids = mot_tracker.update(detection_with_scores) if (verbose == 1): print("tracked bbs:") print(track_bbs_ids) # Associate the track_BBs with the original bbs # for each of the track bbs # find the nearest neighbour in the original detections # associate the ID with the index of the original detection index_array = np.zeros(number_of_masks) if verbose == 1: print("number of masks {}".format(number_of_masks)) for track in track_bbs_ids: nn_index = find_nn(track[:-1], detection) index_array[nn_index] = track[-1] if (verbose == 1): print("The index array is") print(index_array) max_idx = np.amax(index_array) if number_of_masks > 0 else 0 if (max_idx > tracks_n): tracks_n = max_idx ID_dataset_key = "{}/IDs".format(frame) if (ID_dataset_key in f): f[ID_dataset_key][:, 1] = index_array else: f.create_dataset(ID_dataset_key, (index_array.shape[0], 2)) f[ID_dataset_key][:, 0] = index_array f["tracks_n"][0] = tracks_n KalmanBoxTracker.count = 0 f.close()
def run(detector, number, cam, mask_name, date, name): print('Processing video number ', number) video_capture = cv2.VideoCapture('/media/aioz-trung-intern/data/sml/' + cam + '/' + name) w = 640 h = 480 # Save video fourcc = cv2.VideoWriter_fourcc(*'MP4V') out = cv2.VideoWriter('data_res/res_' + cam + '/o_' + name, fourcc, 30, (w, h)) # Draw mask region on video frame fps = 0.0 mask = cv2.imread(mask_name, 0) contours, _ = cv2.findContours(np.expand_dims(mask, axis=2), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cont_sorted = sorted(contours, key=cv2.contourArea, reverse=True)[:5] x, y, wi, he = cv2.boundingRect(cont_sorted[0]) # Init tracker tracker = Sort(use_dlib=True) #create instance of the SORT tracker bbox_stack = [] avg_people = 0 count = 0 video_mask_frame = np.zeros(shape=[480, 640], dtype=np.float64) nop_list = [] x_mask = [] y_mask = [] # Processing video while (video_capture.isOpened()): ret, frame = video_capture.read() # frame shape 640*480*3 if (not ret): break count += 1 org_frame = cv2.resize(frame, (w, h), interpolation=cv2.INTER_AREA) mask_frame = np.zeros(shape=[480, 640], dtype=np.uint8) # Save first frame #frame = org_frame.copy() #frame[mask==0] = [0,0,0] #cv2.imwrite('frame.jpg', frame) #break #if (count == 200): # break t1 = time.time() # Detect boxs, _, _ = process(detector, org_frame) if (boxs.shape[0] != 0): boxs[:, 2] = boxs[:, 2] - boxs[:, 0] boxs[:, 3] = boxs[:, 3] - boxs[:, 1] # Draw bbox # Bounding Box rectify num_of_person = 0 filtered_bbox = [] for bbox in boxs: if (check_intersect(bbox, mask)): avg_people += 1 filtered_bbox.append(bbox) if (len(bbox_stack) != stack_num): bbox_stack.append(filtered_bbox) for bbox in filtered_bbox: cv2.rectangle(org_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), (255, 0, 0), 2) cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2), int((2 * bbox[1] + bbox[3]) / 2)), 20, (255), -1) x_mask.append(int((2 * bbox[0] + bbox[2]) / 2)) y_mask.append(int((2 * bbox[1] + bbox[3]) / 2)) num_of_person = len(filtered_bbox) else: bbox_stack_len = [len(x) for x in bbox_stack] list_counter = Counter(bbox_stack_len) argmax = np.argmax(list(list_counter.values())) key = list(list_counter.keys())[argmax] index = [i for i, e in enumerate(bbox_stack_len) if e == key] if (key != len(filtered_bbox)): for bbox in bbox_stack[index[-1]]: cv2.rectangle( org_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), (255, 0, 0), 2) cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2), int((2 * bbox[1] + bbox[3]) / 2)), 20, (255), -1) x_mask.append(int((2 * bbox[0] + bbox[2]) / 2)) y_mask.append(int((2 * bbox[1] + bbox[3]) / 2)) num_of_person = len(bbox_stack[index[-1]]) else: for bbox in filtered_bbox: cv2.rectangle( org_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), (255, 0, 0), 2) cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2), int((2 * bbox[1] + bbox[3]) / 2)), 20, (255), -1) x_mask.append(int((2 * bbox[0] + bbox[2]) / 2)) y_mask.append(int((2 * bbox[1] + bbox[3]) / 2)) num_of_person = len(filtered_bbox) bbox_stack.append(filtered_bbox) del bbox_stack[0] cv2.imshow('mask' + cam, cv2.threshold(mask_frame, 1, 255, cv2.THRESH_BINARY)[1]) nop_list.append(int(num_of_person)) video_mask_frame += cv2.threshold(mask_frame, 1, 255, cv2.THRESH_BINARY)[1] / 255.0 # Put number and fps fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(org_frame, 'FPS: ' + str(int(fps)), (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(org_frame, 'Pps: ' + str(num_of_person), (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) # Apply transparent mask to frame tmp_mask = org_frame.copy() tmp_mask[mask == 255] = 255 alpha = 0.6 cv2.addWeighted(org_frame, alpha, tmp_mask, 1 - alpha, 0, org_frame) # Write video out.write(org_frame) cv2.imshow('original' + cam, org_frame) #cv2.imshow('masking', frame) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() out.release() cv2.destroyAllWindows() np.savetxt('data_res/result/' + str(number) + '.txt', [avg_people / count]) np.savetxt('data_res/res_' + cam + '/' + name.replace('.mp4', '') + '.txt', nop_list, fmt='%d') np.savetxt('data_res/cor_res_' + cam + '/x' + name.replace('.mp4', '') + '.txt', x_mask, fmt='%d') np.savetxt('data_res/cor_res_' + cam + '/y' + name.replace('.mp4', '') + '.txt', y_mask, fmt='%d') return video_mask_frame
help="path to the image mask. Default: mask.png", default="mask.png", type=str) args = parser.parse_args() if __name__ == "__main__": register_coco_instances("my_dataset", {'thing_classes': CLASS_NAMES}, "", "") dataset_metadata = MetadataCatalog.get("my_dataset") cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml") ) # получение используемой модели cfg.MODEL.WEIGHTS = "model_final.pth" # путь к найденным лучшим весам модели cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # установить порог распознавания объекта в 50% (объекты, распознанные с меньшей вероятностью не будут учитываться) cfg.MODEL.ROI_HEADS.NUM_CLASSES = len( CLASS_NAMES) # число классов для распознавания detector = DefaultPredictor(cfg) tracker = Sort(max_age=40) detect_on_video(args.video_file, args.save_to, detector, tracker, mask_file=args.image_mask, to_mp4=True)
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() if file == 'camera': file = 0 else: assert os.path.isfile(file), \ 'file {} does not exist'.format(file) camera = skvideo.io.VideoCapture(file) if file == 0: self.say('Press [ESC] to quit video') assert camera.isOpened(), \ 'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None if file == 0: #camera window cv2.namedWindow('', 0) _, frame = camera.read() height, width, _ = frame.shape cv2.resizeWindow('', width, height) else: _, frame = camera.read() height, width, _ = frame.shape if SaveVideo: if file == 0: #camera window fps = 1 / self._get_fps(frame) if fps < 1: fps = 1 else: fps = get_fps_rate(file) output_file = 'output_{}'.format(file) if os.path.exists(output_file): os.remove(output_file) videoWriter = skvideo.io.VideoWriter(output_file, fps=fps, frameSize=(width, height)) videoWriter.open() # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while camera.isOpened(): elapsed += 1 _, frame = camera.read() if frame is None: print('\nEnd of Video') break if self.FLAGS.skip != n: n += 1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(frame) else: fgmask = None preprocessed = self.framework.preprocess(frame) buffer_inp.append(frame) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img, save=False) else: postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker, save=False) if SaveVideo: videoWriter.write(postprocessed) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write('{0:3.3f} FPS'.format(elapsed / (timer() - start))) sys.stdout.flush() sys.stdout.write('\n') if SaveVideo: videoWriter.release() if self.FLAGS.csv: f.close() camera.release()
def __init__(self, flags, logger): self._logger = logger self.tracker = Sort(max_age=flags.obstacle_track_max_age, min_hits=1, min_iou=flags.min_matching_iou)
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() if file == 'camera': file = 0 else: assert os.path.isfile(file), \ 'file {} does not exist'.format(file) camera = cv2.VideoCapture(file) if file == 0: self.say('Press [ESC] to quit video') assert camera.isOpened(), \ 'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None if file == 0: #camera window cv2.namedWindow(self.FLAGS.object_id, 0) _, frame = camera.read() height, width, _ = frame.shape cv2.resizeWindow(self.FLAGS.object_id, width * 0.5, height * 0.5) else: _, frame = camera.read() height, width, _ = frame.shape if self.FLAGS.push_stream: ffmpeg_pipe(self, file, width, height) if SaveVideo: fourcc = cv2.VideoWriter_fourcc(*'XVID') if file == 0: #camera window fps = 1 / self._get_fps(frame) if fps < 1: fps = 1 else: fps = round(camera.get(cv2.CAP_PROP_FPS)) videoWriter = cv2.VideoWriter('output_{}'.format(file), fourcc, fps, (width, height)) # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while camera.isOpened(): if self.FLAGS.process_status == 1: # esc print("gongjia: Stoped! ") break if self.FLAGS.process_status == 2: #print("gongjia: Paused! ") continue elapsed += 1 _, frame = camera.read() if frame is None: print('\nEnd of Video') break if self.FLAGS.skip != n: n += 1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(frame) else: fgmask = None preprocessed = self.framework.preprocess(frame) buffer_inp.append(frame) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img) else: postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker) if SaveVideo: videoWriter.write(postprocessed) if self.FLAGS.display: cv2.imshow(self.FLAGS.object_id, postprocessed) if self.FLAGS.push_stream: #im = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) self.pipe.stdin.write(postprocessed.tobytes()) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write(' {0:3.3f} FPS '.format(elapsed / (timer() - start))) sys.stdout.flush() if self.FLAGS.display: choice = cv2.waitKey(1) if choice == 27: break cv2.imwrite( '{}_{}_counter.jpg'.format(self.FLAGS.demo, self.FLAGS.object_id), postprocessed) sys.stdout.write('\n') if SaveVideo: videoWriter.release() if self.FLAGS.csv: f.close() camera.release() if self.FLAGS.display: cv2.destroyAllWindows() if self.FLAGS.push_stream: self.pipe.stdin.close() self.pipe.wait()
w = videoFile.get(cv2.CAP_PROP_FRAME_WIDTH) h = videoFile.get(cv2.CAP_PROP_FRAME_HEIGHT) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter('cut_4_output.mp4', fourcc, 15.0, (int(w), int(h))) #fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() #store the position of bounding box f = open('{}.csv'.format(videoFilePath), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() # loading deep_sort/sort tracker encoder = None tracker = Sort() # metric = nn_matching.NearestNeighborDistanceMetric("cosine", 0.2, 100) # tracker = Tracker(metric) # encoder = generate_detections.create_box_encoder("/Users/deanzhang/Desktop/learnable.ai_project/tf-faster-rcnn/tools/deep_sort/resources/networks/mars-small128.ckpt-68577") frame_id = 0 while True: frame_id += 1 ret, image = videoFile.read() im = demo_video(sess, net, image, f, writer, frame_id, encoder, tracker) out.write(im) videoFile.release() out.release() cv2.destroyAllWindows()
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() # if file == 'camera': # file = 0 # else: # assert os.path.isfile(file), \ # 'file {} does not exist'.format(file) camera1 = cv2.VideoCapture(file[0]) camera2 = cv2.VideoCapture(file[1]) camera3 = cv2.VideoCapture(file[2]) # if file == 0: # self.say('Press [ESC] to quit video') # # assert camera.isOpened(), \ # 'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None # if file == 0:#camera window # cv2.namedWindow('', 0) # _, frame = camera.read() # height, width, _ = frame.shape # cv2.resizeWindow('', width, height) # else: # _, frame = camera.read() # height, width, _ = frame.shape # if SaveVideo: # fourcc = cv2.VideoWriter_fourcc(*'XVID') # if file == 0:#camera window # fps = 1 / self._get_fps(frame) # if fps < 1: # fps = 1 # else: # fps = round(camera1.get(cv2.CAP_PROP_FPS)) # videoWriter = cv2.VideoWriter( # 'output_{}'.format(file), fourcc, fps, (width, height)) # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while (camera1.isOpened() and camera2.isOpened() and camera3.isOpened()): elapsed += 1 ret1, frame1 = camera1.read() ret2, frame2 = camera2.read() ret3, frame3 = camera3.read() #if(ret1 and ret2 and ret3): h1, w1 = frame1.shape[:2] vis = np.concatenate((frame2, frame1, frame3), axis=1) if self.FLAGS.skip != n: n += 1 continue n = 0 # while camera.isOpened(): # elapsed += 1 # _, frame = camera.read() # if frame is None: # print ('\nEnd of Video') # break # if self.FLAGS.skip != n : # n+=1 # continue # n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(vis) else: fgmask = None preprocessed = self.framework.preprocess(vis) buffer_inp.append(vis) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img) else: #print("else hi") postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker) if SaveVideo: videoWriter.write(postprocessed) if self.FLAGS.display: cv2.imshow('This is postprocessed', postprocessed) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write('{0:3.3f} FPS'.format(elapsed / (timer() - start))) sys.stdout.flush() if self.FLAGS.display: choice = cv2.waitKey(1) if choice == 27: break sys.stdout.write('\n') if SaveVideo: videoWriter.release() if self.FLAGS.csv: f.close() camera1.release() camera2.release() camera3.release() if self.FLAGS.display: cv2.destroyAllWindows()
def run(detector, number, cam, mask_name, date, name): print('Processing video number ', number) video_capture = cv2.VideoCapture('/media/aioz-trung-intern/data/sml/' + cam + '/' + name) w = 640 h = 480 #if writeVideo_flag: # Define the codec and create VideoWriter object #w = int(video_capture.get(3)) #h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MP4V') out = cv2.VideoWriter('data_res/res_' + cam + '/o_' + name, fourcc, 30, (w, h)) fps = 0.0 mask = cv2.imread(mask_name, 0) contours, _ = cv2.findContours(np.expand_dims(mask, axis=2), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cont_sorted = sorted(contours, key=cv2.contourArea, reverse=True)[:5] x, y, wi, he = cv2.boundingRect(cont_sorted[0]) #init tracker tracker = Sort(use_dlib=True) #create instance of the SORT tracker # Display init # colours = np.random.rand(32, 3) # used only for display # plt.ion() # fig = plt.figure() bbox_stack = [] avg_people = 0 count = 0 video_mask_frame = np.zeros(shape=[480, 640], dtype=np.float64) nop_list = [] x_mask = [] y_mask = [] while (video_capture.isOpened()): ret, frame = video_capture.read() # frame shape 640*480*3 if (not ret): break count += 1 #org_frame = cv2.resize(frame, (w, h), interpolation = cv2.INTER_AREA) mask_frame = np.zeros(shape=[480, 640], dtype=np.uint8) #frame = org_frame.copy() #frame[mask==0] = [0,0,0] cv2.imwrite('frame.jpg', frame) break #if (count == 200): # break t1 = time.time() boxs, _, _ = process(detector, org_frame) if (boxs.shape[0] != 0): boxs[:, 2] = boxs[:, 2] - boxs[:, 0] boxs[:, 3] = boxs[:, 3] - boxs[:, 1] #boxs = yolo.detect_image(image) #print("box ", np.asarray(boxs).shape, " box: ", boxs) # break # Draw bbox #print(len(boxs)) num_of_person = 0 filtered_bbox = [] for bbox in boxs: if (check_intersect(bbox, mask)): avg_people += 1 filtered_bbox.append(bbox) #cv2.rectangle(org_frame,(int(bbox[0]), int(bbox[1])), (int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])),(255,0,0), 2) if (len(bbox_stack) != stack_num): bbox_stack.append(filtered_bbox) for bbox in filtered_bbox: cv2.rectangle(org_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), (255, 0, 0), 2) cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2), int((2 * bbox[1] + bbox[3]) / 2)), 20, (255), -1) x_mask.append(int((2 * bbox[0] + bbox[2]) / 2)) y_mask.append(int((2 * bbox[1] + bbox[3]) / 2)) num_of_person = len(filtered_bbox) else: bbox_stack_len = [len(x) for x in bbox_stack] list_counter = Counter(bbox_stack_len) argmax = np.argmax(list(list_counter.values())) key = list(list_counter.keys())[argmax] index = [i for i, e in enumerate(bbox_stack_len) if e == key] if (key != len(filtered_bbox)): for bbox in bbox_stack[index[-1]]: cv2.rectangle( org_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), (255, 0, 0), 2) cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2), int((2 * bbox[1] + bbox[3]) / 2)), 20, (255), -1) x_mask.append(int((2 * bbox[0] + bbox[2]) / 2)) y_mask.append(int((2 * bbox[1] + bbox[3]) / 2)) num_of_person = len(bbox_stack[index[-1]]) else: for bbox in filtered_bbox: cv2.rectangle( org_frame, (int(bbox[0]), int(bbox[1])), (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), (255, 0, 0), 2) cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2), int((2 * bbox[1] + bbox[3]) / 2)), 20, (255), -1) x_mask.append(int((2 * bbox[0] + bbox[2]) / 2)) y_mask.append(int((2 * bbox[1] + bbox[3]) / 2)) num_of_person = len(filtered_bbox) bbox_stack.append(filtered_bbox) # #index = bbox_stack_len.index(key) # index = [i for i, e in enumerate(bbox_stack_len) if e == key] del bbox_stack[0] # if (index[-1] == len(filtered_bbox)): # del bbox_stack[0] # else: # del bbox_stack[index[0]] # else: # for bbox in filtered_bbox: # cv2.rectangle(org_frame,(int(bbox[0]), int(bbox[1])), (int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])),(255,0,0), 2) cv2.imshow('mask' + cam, cv2.threshold(mask_frame, 1, 255, cv2.THRESH_BINARY)[1]) nop_list.append(int(num_of_person)) video_mask_frame += cv2.threshold(mask_frame, 1, 255, cv2.THRESH_BINARY)[1] / 255.0 #cv2.rectangle(org_frame,(x,y),(x+wi,y+he),(0,0,255),2) #cv2.drawContours(org_frame, contours, 0, (0, 255, 0), 1) # Update tracker # print(org_frame.shape, boxs) # detections = np.array(boxs) # if (detections.shape[0] != 0): # detections[:,2] = detections[:,2] + detections[:,0] # detections[:,3] = detections[:,3] + detections[:,1] # trackers = tracker.update(detections, frame) # Put number and fps fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(org_frame, 'FPS: ' + str(int(fps)), (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(org_frame, 'Pps: ' + str(num_of_person), (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) # ax1 = fig.add_subplot(111, aspect='equal') # ax1.imshow(org_frame) # for d in trackers: # #f_out.write('%d,%d,%d,%d,x,x,x,x,%.3f,%.3f,%.3f,%.3f\n' % (d[4], frame, 1, 1, d[0], d[1], d[2], d[3])) # d = d.astype(np.int32) # ax1.add_patch(patches.Rectangle((d[0], d[1]), d[2] - d[0], d[3] - d[1], fill=False, lw=3, # ec=colours[d[4] % 32, :])) # ax1.set_adjustable('box') # #label # ax1.annotate('id = %d' % (d[4]), xy=(d[0], d[1]), xytext=(d[0], d[1])) # if detections != []:#detector is active in this frame # ax1.annotate(" DETECTOR", xy=(5, 45), xytext=(5, 45)) # plt.axis('off') # fig.canvas.flush_events() # plt.draw() # fig.tight_layout() # #save the frame with tracking boxes # ax1.cla() # Apply transparent mask to frame tmp_mask = org_frame.copy() tmp_mask[mask == 255] = 255 alpha = 0.6 cv2.addWeighted(org_frame, alpha, tmp_mask, 1 - alpha, 0, org_frame) # Write video out.write(org_frame) cv2.imshow('original' + cam, org_frame) #cv2.imshow('masking', frame) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() out.release() cv2.destroyAllWindows() np.savetxt('data_res/result/' + str(number) + '.txt', [avg_people / count]) np.savetxt('data_res/res_' + cam + '/' + name.replace('.mp4', '') + '.txt', nop_list, fmt='%d') np.savetxt('data_res/cor_res_' + cam + '/x' + name.replace('.mp4', '') + '.txt', x_mask, fmt='%d') np.savetxt('data_res/cor_res_' + cam + '/y' + name.replace('.mp4', '') + '.txt', y_mask, fmt='%d') # Normalize and save #video_mask_frame = np.array(255 * (video_mask_frame - min(video_mask_frame.flatten())) / (max(video_mask_frame.flatten()) - min(video_mask_frame.flatten())), dtype=np.uint8) #print(video_mask_frame) #ax = sns.heatmap(video_mask_frame, vmin=0, vmax=1, cmap='jet') #plt.savefig('data_res/heat_res_' + cam + '/' + 'heat_' + cam + '_' + str(number) + '.png') #imC = cv2.applyColorMap(video_mask_frame, cv2.COLORMAP_JET) #cv2.imwrite('data_res/heat_res_' + cam + '/' + 'heat_' + cam + '_' + str(number) + '.jpg', imC) return video_mask_frame
# Camera variables CAMERA_INFO = None CAMERA_EXTRINSICS = None CAMERA_PROJECTION_MATRIX = None # Frames RADAR_FRAME = 'ti_mmwave' EGO_VEHICLE_FRAME = 'rviz' CAMERA_FRAME = 'rc_car/camera' # Perception models yolov3 = YOLO(configPath='cfg/yolov3-rc.cfg', weightPath='weights/yolov3-rc.weights', metaPath='cfg/rc-car_shoes.data') ipm = InversePerspectiveMapping() tracker = Sort(max_age=200, min_hits=1, use_dlib=False) # FPS loggers FRAME_COUNT = 0 all_fps = FPSLogger('Pipeline') yolo_fps = FPSLogger('YOLOv3') sort_fps = FPSLogger('Tracker') ########################### Functions ########################### def camera_info_callback(camera_info): global CAMERA_INFO, CAMERA_PROJECTION_MATRIX if CAMERA_INFO is None: CAMERA_INFO = camera_info CAMERA_PROJECTION_MATRIX = np.matmul(
def detect_and_track(file_path, save_path, detection_mode="SSD"): # 如果要保存视频,定义视频size size = (640, 480) save_fps = 24 # 假设图中最多300个目标,生成300种随机颜色 colours = np.random.rand(300, 3) * 255 # 为True保存检测后视频 write_video_flag = True video_capture = cv2.VideoCapture(file_path) mot_tracker = Sort() if write_video_flag: output_video = cv2.VideoWriter( save_path + 'output.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), save_fps, size) object_list_file = open(save_path + 'detection.txt', 'w') frame_index = -1 if detection_mode == "SSD": ssd = SSD() elif detection_mode == "YOLO3": yolo = YOLO() elif detection_mode == "CENTERNET": centernet = CenterNet() # appear记录每个出现过的目标存在的帧数量,number记录所有出现过的目标(不重复) appear = {} number = 0 while True: ret, frame = video_capture.read() if ret is not True: break frame = cv2.resize(frame, size) # 记录每一帧开始处理的时间 start_time = time.time() if detection_mode == "SSD": image = frame classes, scores, bboxes = ssd.process_image(image) # 获得检测到的每个目标的左上角和右下角坐标 result = np.array( detect_and_visualization_image.plt_bboxes( image, classes, scores, bboxes)) rbboxes = [] for object in result: rbboxes.append([object[0], object[1], object[2], object[3]]) elif detection_mode == "YOLO3": image = Image.fromarray(frame[..., ::-1]) # bboxes为[x,y,w,h]形式坐标,score为目标分数,rbboxes为左上角+右下角坐标形式 bboxes, scores, rbboxes = yolo.detect_image(image) result = [] for box, score in zip(rbboxes, scores): # 使用目标左上角和右下角坐标用于追踪,注意图像的左上角为原点,x轴向右为正,y轴向下为正 ymin, xmin, ymax, xmax = box xmin, ymin = max(0, np.floor(xmin + 0.5).astype('int32')), max( 0, np.floor(ymin + 0.5).astype('int32')) xmax, ymax = min(image.size[0], np.floor(xmax + 0.5).astype('int32')), min( image.size[1], np.floor(ymax + 0.5).astype('int32')) result.append([xmin, ymin, xmax, ymax, score]) result = np.array(result) elif detection_mode == "CENTERNET": image = frame # 这里的boxes_results是左上角和右下角坐标 rbboxes, scores, classes = centernet.detect_image(image) result = [] for i in range(len(rbboxes)): result.append([ rbboxes[i][0], rbboxes[i][1], rbboxes[i][2], rbboxes[i][3], scores[i] ]) result = np.array(result) if len(result) != 0: # 调用目标检测结果 det = result[:, 0:5] else: det = result # 调用sort进行数据关联追踪 trackers = mot_tracker.update(det) for object in trackers: xmin, ymin, xmax, ymax, index = int(object[0]), int( object[1]), int(object[2]), int(object[3]), int(object[4]) color = (int(colours[index % 300, 0]), int(colours[index % 300, 1]), int(colours[index % 300, 2])) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) cv2.putText(frame, str(index), (xmin, ymin), 0, 5e-3 * 200, color, 2) if index in appear.keys(): appear[index] += 1 else: number += 1 appear[index] = 1 show_fps = 1. / (time.time() - start_time) cv2.putText(frame, text="FPS: " + str(int(show_fps)), org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) cv2.putText(frame, text="number: " + str(number), org=(3, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) cv2.imshow('result', frame) if write_video_flag: # 保存视频每一帧 output_video.write(frame) # 更新视频帧编号 frame_index = frame_index + 1 # detection.txt写入下一帧的编号 object_list_file.write(str(frame_index) + ' ') # 写入每一帧探测到的目标位置,即目标狂的左上角和右下角坐标 if len(rbboxes) != 0: for i in range(0, len(rbboxes)): object_list_file.write( str(rbboxes[i][0]) + ' ' + str(rbboxes[i][1]) + ' ' + str(rbboxes[i][2]) + ' ' + str(rbboxes[i][3]) + ' ') object_list_file.write('\n') # 按q可退出 if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if write_video_flag: output_video.release() object_list_file.close() cv2.destroyAllWindows()
def sort(yolo, args): images_input = True if os.path.isdir(args.input) else False if images_input: # get images list jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg')) jpg_files = glob.glob(os.path.join(args.input, '*.jpg')) frame_capture = jpeg_files + jpg_files frame_capture.sort() else: # create video capture stream frame_capture = cv2.VideoCapture(0 if args.input == '0' else args.input) if not frame_capture.isOpened(): raise IOError("Couldn't open webcam or video") # create video save stream if needed save_output = True if args.output != "" else False if save_output: if images_input: raise ValueError("image folder input could be saved to video file") # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later # to convert it to x264 to reduce file size: # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4 # #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc( *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v") video_fps = frame_capture.get(cv2.CAP_PROP_FPS) video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(args.output, video_FourCC, (5. if args.input == '0' else video_fps), video_size) if args.tracking_classes_path: # load the object classes used in tracking if have, other class # from detector will be ignored tracking_class_names = get_classes(args.tracking_classes_path) else: tracking_class_names = None # create instance of the SORT tracker tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3) # alloc a set of queues to record motion trace # for each track id motion_traces = [deque(maxlen=30) for _ in range(9999)] total_obj_counter = [] # initialize a list of colors to represent each possible class label np.random.seed(100) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") i = 0 fps = 0.0 while True: ret, frame = get_frame(frame_capture, i, images_input) if ret != True: break #time.sleep(0.2) i += 1 start_time = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # detect object from image _, out_boxes, out_classnames, out_scores = yolo.detect_image(image) # get tracking objects boxes, class_names, scores = get_tracking_object(out_boxes, out_classnames, out_scores, tracking_class_names, convert_box=False) # form up detection records if len(boxes) != 0: detections = np.array([ bbox + [score] for bbox, score, class_name in zip(boxes, scores, class_names) ]) else: detections = np.empty((0, 5)) # Call the tracker tracks = tracker.update(detections) # show all detection result as white box for j, bbox in enumerate(boxes): cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, class_names[j], (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) track_indexes = [] track_count = 0 for track in tracks: bbox = track[:4] track_id = int(track[4]) # record tracking info and get bbox track_indexes.append(int(track_id)) total_obj_counter.append(int(track_id)) # show all tracking result as color box color = [int(c) for c in COLORS[track_id % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) #if track.class_name: #cv2.putText(frame, str(track.class_name), (int(bbox[0]+30), int(bbox[1]-20)), 0, 5e-3*150, (color), 2) track_count += 1 # get center point (x,y) of current track bbox and record in queue center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) motion_traces[track_id].append(center) # draw current center point thickness = 5 cv2.circle(frame, (center), 1, color, thickness) #draw motion trace motion_trace = motion_traces[track_id] for j in range(1, len(motion_trace)): if motion_trace[j - 1] is None or motion_trace[j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]), (color), thickness) # show tracking statistics total_obj_num = len(set(total_obj_counter)) cv2.putText(frame, "Total Object Counter: " + str(total_obj_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(track_count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # refresh window cv2.namedWindow("SORT", 0) cv2.resizeWindow('SORT', 1024, 768) # cv2.imshow('SORT', frame) # Xander commented out if save_output: #save a frame out.write(frame) end_time = time.time() fps = (fps + (1. / (end_time - start_time))) / 2 # Press q to stop video if cv2.waitKey(1) & 0xFF == ord('q'): break # Release everything if job is finished if not images_input: frame_capture.release() if save_output: out.release() cv2.destroyAllWindows()
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo if self.FLAGS.track: if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath( "deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track: fgbg = cv2.createBackgroundSubtractorMOG2() if file == 'camera': file = 0 else: assert os.path.isfile(file), \ 'file {} does not exist'.format(file) vid = imageio.get_reader(file, 'ffmpeg') #cv2.VideoCapture(file) if file == 0: self.say('Press [ESC] to quit video') #assert camera.isOpened(), \ #'Cannot capture source' if self.FLAGS.csv: f = open('{}.csv'.format(file), 'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h']) f.flush() else: f = None writer = None # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 plt.ion() fig = plt.figure() ax = plt.gca() frame = vid.get_data(0) img_artist = ax.imshow(frame) for num in range(1, 20000): try: frame = vid.get_data(num) print(num) except: break elapsed += 1 #_, frame = camera.read() if frame is None: print('\nEnd of Video') break if self.FLAGS.skip != n: n += 1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track: fgmask = fgbg.apply(frame) else: fgmask = None preprocessed = self.framework.preprocess(frame) buffer_inp.append(frame) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track: postprocessed = self.framework.postprocess(single_out, img) else: postprocessed = self.framework.postprocess( single_out, img, frame_id=elapsed, csv_file=f, csv=writer, mask=fgmask, encoder=encoder, tracker=tracker) if self.FLAGS.display: #cv2.imshow('', postprocessed) img_artist.set_data(postprocessed) plt.show() plt.pause(0.00001) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: sys.stdout.write('\r') sys.stdout.write('{0:3.3f} FPS'.format(elapsed / (timer() - start))) sys.stdout.flush() sys.stdout.write('\n') if self.FLAGS.csv: f.close()
def camera(self): file = self.FLAGS.demo SaveVideo = self.FLAGS.saveVideo detectedObjects = [] if self.FLAGS.track : if self.FLAGS.tracker == "deep_sort": from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) tracker = Tracker(metric) encoder = generate_detections.create_box_encoder( os.path.abspath("deep_sort/resources/networks/mars-small128.ckpt-68577")) elif self.FLAGS.tracker == "sort": from sort.sort import Sort encoder = None tracker = Sort() if self.FLAGS.BK_MOG and self.FLAGS.track : fgbg = cv2.bgsegm.createBackgroundSubtractorMOG() if file == 'camera': file = 0 else: assert os.path.isfile(file), \ 'file {} does not exist'.format(file) camera = cv2.VideoCapture(file) # if file == 0: # self.say('Press [ESC] to quit video') assert camera.isOpened(), \ 'Cannot capture source' savedPath = 'result/output_{}'.format(os.path.basename(file)) if self.FLAGS.csv : f = open('{}.csv'.format(file),'w') writer = csv.writer(f, delimiter=',') writer.writerow(['frame_id', 'track_id' , 'x', 'y', 'w', 'h']) f.flush() else : f =None writer= None if file == 0:#camera window cv2.namedWindow('', 0) _, frame = camera.read() height, width, _ = frame.shape cv2.resizeWindow('', width, height) else: _, frame = camera.read() height, width, _ = frame.shape if SaveVideo: fourcc = cv2.VideoWriter_fourcc(*'XVID') if file == 0:#camera window fps = 1 / self._get_fps(frame) if fps < 1: fps = 1 else: fps = round(camera.get(cv2.CAP_PROP_FPS)) videoWriter = cv2.VideoWriter( savedPath, fourcc, fps, (width, height)) # buffers for demo in batch buffer_inp = list() buffer_pre = list() elapsed = 0 start = timer() # self.say('Press [ESC] to quit demo') #postprocessed = [] # Loop through frames n = 0 while camera.isOpened(): elapsed += 1 _, frame = camera.read() if frame is None: # print ('\nEnd of Video') break if self.FLAGS.skip != n : n+=1 continue n = 0 if self.FLAGS.BK_MOG and self.FLAGS.track : fgmask = fgbg.apply(frame) else : fgmask = None preprocessed = self.framework.preprocess(frame) buffer_inp.append(frame) buffer_pre.append(preprocessed) # Only process and imshow when queue is full if elapsed % self.FLAGS.queue == 0: feed_dict = {self.inp: buffer_pre} net_out = self.sess.run(self.out, feed_dict) for img, single_out in zip(buffer_inp, net_out): if not self.FLAGS.track : postprocessed = self.framework.postprocess( single_out, img) else : postprocessedTuple = self.framework.postprocess( single_out, img,frame_id = elapsed, csv_file=f,csv=writer,mask = fgmask, encoder=encoder,tracker=tracker) postprocessed = postprocessedTuple[0] detectedObjects.append(postprocessedTuple[1]) if SaveVideo: videoWriter.write(postprocessed) if self.FLAGS.display : cv2.imshow('', postprocessed) # Clear Buffers buffer_inp = list() buffer_pre = list() if elapsed % 5 == 0: # sys.stdout.write('\r') # sys.stdout.write('{0:3.3f} FPS'.format( # elapsed / (timer() - start))) sys.stdout.flush() if self.FLAGS.display : choice = cv2.waitKey(1) if choice == 27: break flattenObjects = sum(detectedObjects, []) frameDictionary = dict() numDictionary = dict() totalFrame = int(camera.get(7)) #CAP_PROP_FRAME_COUNT fps = int(camera.get(5)) #CAP_PROP_FPS illegalSecond = 30 stopFrameCountThreshold = fps * illegalSecond for object in flattenObjects: frameDictionary\ .setdefault(object.frame, []).append(object.num) numDictionary\ .setdefault(object.num, Car(object.num, Segment(object.frame, object.position), stopFrameCountThreshold))\ .update(Segment(object.frame, object.position)) resultJSON = { "frames": list(map(lambda key: {"id": key, "carNums": frameDictionary[key]}, frameDictionary)), "cars": list(map(lambda value: json.loads(json.dumps(value, default=lambda o: o.__dict__)), numDictionary.values())), "resultVideoPath": savedPath } print(json.dumps(resultJSON, indent=2)) if SaveVideo: videoWriter.release() if self.FLAGS.csv : f.close() camera.release() if self.FLAGS.display : cv2.destroyAllWindows()