def get_video_reader(args, video_name): date, hr_slot, camera = parse_meva_clip_name(video_name) video_path = os.path.join(args.video_path, date, hr_slot, video_name + ".avi") if args.use_lijun_video_loader: vcap = VideoReader(video_path) frame_count = int(vcap.length) elif args.use_moviepy: vcap = VideoFileClip(video_path, audio=False) frame_count = int(vcap.fps * vcap.duration) # uh vcap = vcap.iter_frames() else: try: vcap = cv2.VideoCapture(video_path) if not vcap.isOpened(): raise Exception("cannot open %s" % video_path) except Exception as e: raise Exception("warning, cannot open %s" % video_path) # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # start reading frames into queues now video_queuer = VideoEnqueuer( args, vcap, frame_count, frame_gap=1, # no skipping frames prefetch=100, start=True, is_moviepy=args.use_moviepy, batch_size=1) get_frame_batches = video_queuer.get() return get_frame_batches, video_queuer.num_batches
metric, max_iou_distance=args.max_iou_distance) tracking_results_dict[tracking_obj] = [] tmp_tracking_results_dict[tracking_obj] = {} # videoname = os.path.splitext(os.path.basename(videofile))[0] videoname = os.path.basename(videofile) video_obj_out_path = None if args.obj_out_dir is not None: # not saving box json to save time video_obj_out_path = os.path.join(args.obj_out_dir, videoname) if not os.path.exists(video_obj_out_path): os.makedirs(video_obj_out_path) video_queuer = VideoEnqueuer(args, vcap, frame_count, frame_gap=args.frame_gap, prefetch=args.prefetch, start=True, is_moviepy=args.use_moviepy, batch_size=args.im_batch_size) get_batches = video_queuer.get() for batch in tqdm(get_batches, total=video_queuer.num_batches): # batch is a list of (resized_image, scale, frame_count) valid_frame_num = len(batch) if len(batch) < args.im_batch_size: batch += [batch[-1]] * (args.im_batch_size - len(batch)) run_detect_and_track(args, batch, sess, model,
if not vcap.isOpened(): raise Exception("cannot open %s" % video_path) except Exception as e: # raise e # just move on to the next video print("warning, cannot open %s" % video_path) continue # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # start reading frames into queues now video_queuer = VideoEnqueuer( args, vcap, frame_count, frame_gap=1, # no skipping frames prefetch=100, start=True, is_moviepy=args.use_moviepy, batch_size=1) get_frame_batches = video_queuer.get() # 1. read the tracklets and identify potential matching tracklets # for each query tracklet person_track_file = os.path.join( args.filepath, videoname, "Person", "%s.txt" % (os.path.splitext(videoname)[0])) vehicle_track_file = os.path.join( args.filepath, videoname, "Vehicle", "%s.txt" % (os.path.splitext(videoname)[0]))
def load_track_and_features(args, video_name, p_file, v_file, p_extractor, v_extractor, hs): date, hr_slot, camera = parse_meva_clip_name(video_name) # start loading video_frames first video_path = os.path.join(args.video_path, date, hr_slot, video_name + ".avi") if args.use_lijun_video_loader: vcap = VideoReader(video_path) frame_count = int(vcap.length) elif args.use_moviepy: vcap = VideoFileClip(video_path, audio=False) frame_count = int(vcap.fps * vcap.duration) # uh vcap = vcap.iter_frames() else: try: vcap = cv2.VideoCapture(video_path) if not vcap.isOpened(): raise Exception("cannot open %s" % video_path) except Exception as e: raise Exception("warning, cannot open %s" % video_path) # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # start reading frames into queues now video_queuer = VideoEnqueuer( args, vcap, frame_count, frame_gap=1, # no skipping frames prefetch=100, start=True, is_moviepy=args.use_moviepy, batch_size=1) get_frame_batches = video_queuer.get() def load_track_file(file_path, homography): """load a tracking file into dict of numpy arrays.""" # assuming sorted by frameid data = [] with open(file_path, "r") as f: for line in f: frame_idx, track_id, left, top, width, height, conf, _, _, _ = line.strip().split(",") data.append([frame_idx, track_id, left, top, width, height, conf]) if not data: return {} data = np.array(data, dtype="float32") # [N, 7] # compute topdown points foot_points_x = data[:, 2] + data[:, 4] / 2. # [N] foot_points_y = data[:, 3] + data[:, 5] foot_points = np.stack([foot_points_x, foot_points_y], axis=0) # [2, N] # [2, N] top_down_points = warp_points(foot_points, homography) top_down_points = np.transpose(top_down_points, [1, 0]) # [N, 2] # [N, 9] data = np.concatenate([data, top_down_points], axis=1) track_ids = np.unique(data[:, 1]).tolist() track_data = {} # [num_track, K, 9] for track_id in track_ids: track_data[track_id] = data[data[:, 1] == track_id, :] return track_data # track_id -> data p_tracks = load_track_file(p_file, hs[camera]) v_tracks = load_track_file(v_file, hs[camera]) # get each frame's boxes to extract frame_data = {} # frame_idx -> a list of boxes, def get_track_boxes(tracks, cat_name): for track_id in tracks: idxs = list(range(0, len(tracks[track_id]), args.feature_box_gap)) idxs = idxs[:args.feature_box_num] boxes = tracks[track_id][idxs, :] # [k, 7] for box_idx, box in enumerate(boxes): frame_idx = box[0] tlwh = box[2:6] if not frame_idx in frame_data: frame_data[frame_idx] = [] frame_data[frame_idx].append((tlwh, track_id, box_idx, cat_name)) get_track_boxes(p_tracks, "Person") get_track_boxes(v_tracks, "Vehicle") # 2. go through the video once and crop all the images to extract features # assuming not conflict between person/vehicle track_id p_track_to_feat = {} # "track_id" => features v_track_to_feat = {} # "track_id" => features for batch in tqdm(get_frame_batches, total=video_queuer.num_batches): image, scale, frame_idx = batch[0] image = image.astype("uint8") # need uint8 type if frame_idx in frame_data: for tlwh, track_id, box_idx, cat_name in frame_data[frame_idx]: # check box valid if valid_box(tlwh, image): x, y, w, h = tlwh x, y, w, h = int(x), int(y), int(w), int(h) #print(x, y, w, h) #print(image[y:y+h, x:x+w]) box_img = cv2.cvtColor( image[y:y+h, x:x+w], cv2.COLOR_BGR2RGB) if cat_name == "Person": if track_id not in p_track_to_feat: p_track_to_feat[track_id] = [] p_track_to_feat[track_id].append(box_img) elif cat_name == "Vehicle": if track_id not in v_track_to_feat: v_track_to_feat[track_id] = [] v_track_to_feat[track_id].append(box_img) # extract features def get_features(track_to_imgs, extractor): for track_id in track_to_imgs: box_imgs = track_to_imgs[track_id] track_to_imgs[track_id] = extractor(box_imgs).cpu().numpy() # [K, 512] if args.use_avg_pool: # [1, 512] track_to_imgs[track_id] = np.mean( track_to_imgs[track_id], axis=0, keepdims=True) get_features(p_track_to_feat, p_extractor) get_features(v_track_to_feat, v_extractor) data = {} def gather_data(track_data, track_features, cat_name): data[cat_name] = {} for track_id in track_data: # ignore track with no valid boxes if track_id in track_features: data[cat_name][track_id] = ( track_data[track_id], track_features[track_id]) gather_data(p_tracks, p_track_to_feat, "Person") gather_data(v_tracks, v_track_to_feat, "Vehicle") return data