def get_video_reader(args, video_name): date, hr_slot, camera = parse_meva_clip_name(video_name) video_path = os.path.join(args.video_path, date, hr_slot, video_name + ".avi") if args.use_lijun_video_loader: vcap = VideoReader(video_path) frame_count = int(vcap.length) elif args.use_moviepy: vcap = VideoFileClip(video_path, audio=False) frame_count = int(vcap.fps * vcap.duration) # uh vcap = vcap.iter_frames() else: try: vcap = cv2.VideoCapture(video_path) if not vcap.isOpened(): raise Exception("cannot open %s" % video_path) except Exception as e: raise Exception("warning, cannot open %s" % video_path) # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # start reading frames into queues now video_queuer = VideoEnqueuer( args, vcap, frame_count, frame_gap=1, # no skipping frames prefetch=100, start=True, is_moviepy=args.use_moviepy, batch_size=1) get_frame_batches = video_queuer.get() return get_frame_batches, video_queuer.num_batches
frame_count = int(vcap.fps * vcap.duration) # uh vcap = vcap.iter_frames() else: try: vcap = cv2.VideoCapture(videofile) if not vcap.isOpened(): raise Exception("cannot open %s" % videofile) except Exception as e: # raise e # just move on to the next video print("warning, cannot open %s" % videofile) continue # opencv 2 if cv2.__version__.split(".")[0] == "2": frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) else: # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # initialize tracking module if args.get_tracking: tracking_objs = args.tracking_objs.split(",") tracker_dict = {} tracking_results_dict = {} tmp_tracking_results_dict = {} for tracking_obj in tracking_objs: metric = metric = nn_matching.NearestNeighborDistanceMetric( "cosine", args.max_cosine_distance, args.nn_budget) tracker_dict[tracking_obj] = Tracker( metric, max_iou_distance=args.max_iou_distance)
def load_track_and_features(args, video_name, p_file, v_file, p_extractor, v_extractor, hs): date, hr_slot, camera = parse_meva_clip_name(video_name) # start loading video_frames first video_path = os.path.join(args.video_path, date, hr_slot, video_name + ".avi") if args.use_lijun_video_loader: vcap = VideoReader(video_path) frame_count = int(vcap.length) elif args.use_moviepy: vcap = VideoFileClip(video_path, audio=False) frame_count = int(vcap.fps * vcap.duration) # uh vcap = vcap.iter_frames() else: try: vcap = cv2.VideoCapture(video_path) if not vcap.isOpened(): raise Exception("cannot open %s" % video_path) except Exception as e: raise Exception("warning, cannot open %s" % video_path) # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # start reading frames into queues now video_queuer = VideoEnqueuer( args, vcap, frame_count, frame_gap=1, # no skipping frames prefetch=100, start=True, is_moviepy=args.use_moviepy, batch_size=1) get_frame_batches = video_queuer.get() def load_track_file(file_path, homography): """load a tracking file into dict of numpy arrays.""" # assuming sorted by frameid data = [] with open(file_path, "r") as f: for line in f: frame_idx, track_id, left, top, width, height, conf, _, _, _ = line.strip().split(",") data.append([frame_idx, track_id, left, top, width, height, conf]) if not data: return {} data = np.array(data, dtype="float32") # [N, 7] # compute topdown points foot_points_x = data[:, 2] + data[:, 4] / 2. # [N] foot_points_y = data[:, 3] + data[:, 5] foot_points = np.stack([foot_points_x, foot_points_y], axis=0) # [2, N] # [2, N] top_down_points = warp_points(foot_points, homography) top_down_points = np.transpose(top_down_points, [1, 0]) # [N, 2] # [N, 9] data = np.concatenate([data, top_down_points], axis=1) track_ids = np.unique(data[:, 1]).tolist() track_data = {} # [num_track, K, 9] for track_id in track_ids: track_data[track_id] = data[data[:, 1] == track_id, :] return track_data # track_id -> data p_tracks = load_track_file(p_file, hs[camera]) v_tracks = load_track_file(v_file, hs[camera]) # get each frame's boxes to extract frame_data = {} # frame_idx -> a list of boxes, def get_track_boxes(tracks, cat_name): for track_id in tracks: idxs = list(range(0, len(tracks[track_id]), args.feature_box_gap)) idxs = idxs[:args.feature_box_num] boxes = tracks[track_id][idxs, :] # [k, 7] for box_idx, box in enumerate(boxes): frame_idx = box[0] tlwh = box[2:6] if not frame_idx in frame_data: frame_data[frame_idx] = [] frame_data[frame_idx].append((tlwh, track_id, box_idx, cat_name)) get_track_boxes(p_tracks, "Person") get_track_boxes(v_tracks, "Vehicle") # 2. go through the video once and crop all the images to extract features # assuming not conflict between person/vehicle track_id p_track_to_feat = {} # "track_id" => features v_track_to_feat = {} # "track_id" => features for batch in tqdm(get_frame_batches, total=video_queuer.num_batches): image, scale, frame_idx = batch[0] image = image.astype("uint8") # need uint8 type if frame_idx in frame_data: for tlwh, track_id, box_idx, cat_name in frame_data[frame_idx]: # check box valid if valid_box(tlwh, image): x, y, w, h = tlwh x, y, w, h = int(x), int(y), int(w), int(h) #print(x, y, w, h) #print(image[y:y+h, x:x+w]) box_img = cv2.cvtColor( image[y:y+h, x:x+w], cv2.COLOR_BGR2RGB) if cat_name == "Person": if track_id not in p_track_to_feat: p_track_to_feat[track_id] = [] p_track_to_feat[track_id].append(box_img) elif cat_name == "Vehicle": if track_id not in v_track_to_feat: v_track_to_feat[track_id] = [] v_track_to_feat[track_id].append(box_img) # extract features def get_features(track_to_imgs, extractor): for track_id in track_to_imgs: box_imgs = track_to_imgs[track_id] track_to_imgs[track_id] = extractor(box_imgs).cpu().numpy() # [K, 512] if args.use_avg_pool: # [1, 512] track_to_imgs[track_id] = np.mean( track_to_imgs[track_id], axis=0, keepdims=True) get_features(p_track_to_feat, p_extractor) get_features(v_track_to_feat, v_extractor) data = {} def gather_data(track_data, track_features, cat_name): data[cat_name] = {} for track_id in track_data: # ignore track with no valid boxes if track_id in track_features: data[cat_name][track_id] = ( track_data[track_id], track_features[track_id]) gather_data(p_tracks, p_track_to_feat, "Person") gather_data(v_tracks, v_track_to_feat, "Vehicle") return data