Example #1
0
def make_dataset(video_names, video_root):
    dataset = []
    num_frames_collect = []

    i = 0
    for vid in video_names:
        video_path = os.path.join(video_root, vid)
        try:
            cap = VideoReader(video_path)
        except:
            print('Error in reading %s' % video_path)
            continue

        if not os.path.exists(video_path):
            print('Warning: %s not exist!' % video_path)
            continue

        num_frames = cap.length
        if num_frames < cfg.DATASET.CLIP_LEN:
            print('Skipping %s due to the short length %d.' %
                  (video_path, num_frames))
            continue

        dataset.append(video_path)
        num_frames_collect.append(num_frames)

        #print(vid, num_frames)
        i += 1

    return dataset, num_frames_collect
Example #2
0
def gen_mask_gt(video_list,
                video_root_path,
                annot_path,
                annot_out_path,
                video_suffix='avi'):
    mask_annot = annot.MaskAnnotation(annot_path)

    with open(video_list, 'r') as f:
        lines = f.readlines()
        vids = [line.strip() for line in lines]

    height, width = -1, -1
    event_bbox = mask_annot.event_bbox
    if vids[0].split(".")[-1] in SUFFIX:
        with_suffix = True
    else:
        with_suffix = False

    for vid in vids:
        vname = vid + '.' + video_suffix if not with_suffix else vid
        video_path = os.path.join(video_root_path, vname)
        cap = VideoReader(video_path)
        if vname not in event_bbox:
            continue

        for frame in cap.get_iter(1):
            img = frame.numpy()
            assert (len(img.shape) > 1)
            height, width, _ = img.shape

        cur_bboxes = event_bbox[vname]
        for fid in cur_bboxes:
            img = np.zeros([height, width])
            bboxes = cur_bboxes[fid]
            for bbox in bboxes:
                print(vid, fid, bbox, height, width)
                x1, y1, w, h = bbox
                img[y1:y1 + h, x1:x1 + w] = 255

            save_path = os.path.join(annot_out_path, vid)
            if not os.path.isdir(save_path):
                os.makedirs(save_path)
            cv2.imwrite(os.path.join(save_path, "frame_%d_mask.png" % fid),
                        img)
Example #3
0
def get_video_reader(args, video_name):
    date, hr_slot, camera = parse_meva_clip_name(video_name)
    video_path = os.path.join(args.video_path, date, hr_slot,
                              video_name + ".avi")
    if args.use_lijun_video_loader:
        vcap = VideoReader(video_path)
        frame_count = int(vcap.length)
    elif args.use_moviepy:
        vcap = VideoFileClip(video_path, audio=False)
        frame_count = int(vcap.fps * vcap.duration)  # uh
        vcap = vcap.iter_frames()
    else:
        try:
            vcap = cv2.VideoCapture(video_path)
            if not vcap.isOpened():
                raise Exception("cannot open %s" % video_path)
        except Exception as e:
            raise Exception("warning, cannot open %s" % video_path)
        # opencv 3/4
        frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)

    # start reading frames into queues now
    video_queuer = VideoEnqueuer(
        args,
        vcap,
        frame_count,
        frame_gap=1,  # no skipping frames
        prefetch=100,
        start=True,
        is_moviepy=args.use_moviepy,
        batch_size=1)
    get_frame_batches = video_queuer.get()
    return get_frame_batches, video_queuer.num_batches
Example #4
0
def load_frames_from_video(video_path, start, num, stride=1):
    frames = []

    cap = VideoReader(video_path)
    start_frame_id = start * stride
    video_len = cap.length

    length = num * stride
    if length > video_len - start_frame_id:
        start_frame_id = video_len - length

    cap.seek(start_frame_id)

    count = 0
    for frame in cap.get_iter(length):
        if count % stride:
            count += 1
            continue

        img = frame.numpy()

        assert (len(img.shape) > 1)
        img = img[:, :, [2, 1, 0]]
        h, w, c = img.shape
        #print('shape: w: %d, h: %d, c: %d' % (w, h, c))

        if w < 226 or h < 226:
            d = 226. - min(w, h)
            sc = 1 + d / min(w, h)
            img = cv2.resize(img, dsize=(0, 0), fx=sc, fy=sc)

        img = (img / 255.) * 2 - 1
        frames.append(img)
        count += 1

    return np.asarray(frames, dtype=np.float32), start_frame_id
Example #5
0
def test_and_save_mask(net, test_dataloader):
    clip_len = cfg.DATASET.CLIP_LEN
    clip_stride = cfg.DATASET.CLIP_STRIDE

    for sample in iter(test_dataloader):
        if cfg.TEST.WITH_MASK:
            vid, start_f, clips, vmask = sample
        else:
            vid, start_f, clips = sample
        if clips.size(2) < 8: continue
        # forward and get the prediction result
        vpred = net(to_cuda(clips))
        # N x D x H x W
        probs = F.softmax(vpred, dim=1)
        pos_probs = probs[:, 1, :, :, :]

        start_f = start_f.numpy()
        N = len(vid)
        assert(N == len(start_f))
        for i in range(N):
            cur_vid = vid[i]
            cur_video_path = os.path.join(cfg.DATASET.DATAROOT, 'videos', '%s.%s'%(cur_vid, cfg.DATASET.VIDEO_FORMAT))
            print('video: %s, start_f: %d' % (cur_video_path, start_f[i]))
            # TODO: for debugging
            #if start_f[i] < 5344:
            #    continue
            cur_video = VideoReader(cur_video_path)
            #cur_video.seek(int(start_f[i]))

            frame_count = 0
            proposals = []
            clip_imgs = []
            #masks = []
            #for frame in cur_video.get_iter(clip_len * clip_stride):
            #    if frame_count % clip_stride:
            #        frame_count += 1
            #        continue

            #    # read the image
            #    img = frame.numpy()
            #    assert(len(img.shape) > 1)
            #    clip_imgs += [img]
            #    #img = img[:, :, [2, 1, 0]]
            #    #img = (img / 255.) * 2 - 1

            for fid in range(clip_len * clip_stride):
                if frame_count % clip_stride:
                    frame_count += 1
                    continue

                count = frame_count // clip_stride
                if not cfg.TEST.WITH_DENSE_CRF:
                    cur_pos_probs = pos_probs[i, count, :, :].cpu().numpy()
                else:
                    cur_probs = probs[i, :, count, :, :].cpu().numpy()
                    # TODO: need normalize or not?
                    resized_img = clips[i, :, count, :, :].cpu().numpy()
                    resized_img = np.uint8(255 * (resized_img + 1.) / 2.0)
                    cur_pos_probs = 1.0 * dense_crf(cur_probs, resized_img)

                smoothing(cur_pos_probs)
                labels, num_regions = regional_growing(cur_pos_probs, pixel_val_thres=0.3)
                cur_pos_probs, bboxes = filtering(cur_pos_probs, labels, num_regions, 5)
                #masks.append(cur_pos_probs)
                if len(proposals) == 0:
                    proposals = [[(count, bbox)] for bbox in bboxes]
                else:
                    associate_bboxes(count, bboxes, proposals)

                frame_count += 1

            #heatmaps = draw_heatmaps(clip_imgs, masks)
            #save_visualizations(heatmaps, 'heatmaps', cur_vid, start_f[i])

            #h, w, _ = clip_imgs[0].shape
            h, w = cur_video.height, cur_video.width
            new_proposals = [prop for prop in proposals if len(prop) >= 7]
            print('Number of proposals before and after filtering: %d, %d' % (len(proposals), len(new_proposals)))
            if len(new_proposals) == 0:
                continue
            #for prop in new_proposals:
            #    print(prop)

            stride_x = 1.0 * w / cfg.DATA_TRANSFORM.FINESIZE
            stride_y = 1.0 * h / cfg.DATA_TRANSFORM.FINESIZE
            new_proposals = resize_proposals(new_proposals, stride_x, stride_y, w, h)
            save_proposals(new_proposals, 'proposals', cur_vid, start_f[i])
Example #6
0
    tfconfig = tf.ConfigProto(allow_soft_placement=True)
    if not args.use_all_mem:
        tfconfig.gpu_options.allow_growth = True
    tfconfig.gpu_options.visible_device_list = "%s" % (",".join([
        "%s" % i for i in range(args.gpuid_start, args.gpuid_start + args.gpu)
    ]))

    with tf.Session(config=tfconfig) as sess:

        if not args.is_load_from_pb:
            initialize(config=args, sess=sess)

        for videofile in tqdm(videolst, ascii=True):
            # 2. read the video file
            if args.use_lijun_video_loader:
                vcap = VideoReader(videofile)
                frame_count = int(vcap.length)
            elif args.use_moviepy:
                vcap = VideoFileClip(videofile, audio=False)
                frame_count = int(vcap.fps * vcap.duration)  # uh
                vcap = vcap.iter_frames()
            else:
                try:
                    vcap = cv2.VideoCapture(videofile)
                    if not vcap.isOpened():
                        raise Exception("cannot open %s" % videofile)
                except Exception as e:
                    # raise e
                    # just move on to the next video
                    print("warning, cannot open %s" % videofile)
                    continue
def get_vid_meta(vid, vid_meta):
    video_path = os.path.join(cfg.DATASET.DATAROOT, 'videos',
                              '%s.%s' % (vid, cfg.DATASET.VIDEO_FORMAT))
    video = VideoReader(video_path)
    h, w = video.height, video.width
    vid_meta[vid] = (h, w)
Example #8
0
        if args.use_2level:
            targetpath = os.path.join(args.despath, videoname)
            if not os.path.exists(targetpath):
                os.makedirs(targetpath)

        if args.name_level is not None:
            foldernames = video.split("/")
            prefixes = foldernames[-1 - args.name_level:-1]
            videoname = "__".join(prefixes + [videoname])

        if args.use_moviepy:
            vcap = VideoFileClip(video, audio=False)
            frame_count = int(vcap.fps * vcap.duration)  # uh
            vcap_iter = vcap.iter_frames()
        elif args.use_lijun:
            vcap = VideoReader(video)
            frame_count = int(vcap.length)
        else:
            try:
                vcap = cv2.VideoCapture(video)
                if not vcap.isOpened():
                    raise Exception("cannot open %s" % video)
            except Exception as e:
                raise e

            if cv2.__version__.split(".") != "2":
                frame_width = vcap.get(cv2.CAP_PROP_FRAME_WIDTH)
                frame_height = vcap.get(cv2.CAP_PROP_FRAME_HEIGHT)

                fps = vcap.get(cv2.CAP_PROP_FPS)
                frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)
Example #9
0
def test_and_save_mask(cfg, net, vids, test_transforms):
    clip_len = cfg.DATASET.CLIP_LEN
    clip_stride = cfg.DATASET.CLIP_STRIDE

    net.eval()
    for vid in vids:
        cur_video_path = vid
        cur_vid = os.path.split(cur_video_path)[-1]

        prop_id = 0
        cur_video = VideoReader(cur_video_path)

        video_len = cur_video.length
        last_start_f = (video_len //
                        (clip_len * clip_stride)) * (clip_len * clip_stride)
        if last_start_f == video_len:
            last_start_f = -1
            last_clip_len = -1
        else:
            last_clip_len = video_len - last_start_f
        print('video_len: %d, last_start_f: %d, last_clip_len: %d' %
              (video_len, last_start_f, last_clip_len))

        f_count = 0
        clip_imgs = []
        start_f = 0

        for frame in cur_video:
            f_count += 1
            clip_imgs.append(frame.numpy())
            if len(clip_imgs) < clip_len * clip_stride:
                continue

            start_f = f_count - (clip_len * clip_stride)
            print('video: %s, start_f: %d' % (cur_video_path, start_f))

            clips = processing_frames(clip_imgs[0::clip_stride],
                                      test_transforms)
            clips = video_to_tensor(clips).unsqueeze(0)
            assert (len(clips.size()) == 5), clips.size()
            assert (clips.size(1) == 3), clips.size(1)
            assert (clips.size(2) == clip_len), clips.size(2)

            # forward and get the prediction result
            vpred = net(to_cuda(clips))
            probs = F.softmax(vpred, dim=1)
            pos_probs = probs[:, 1, :, :, :]

            # generate and save proposals
            proposals = []
            for count in range(clip_len):
                cur_pos_probs = pos_probs[0, count, :, :].cpu().numpy()

                # to speedup, first downsample the probability map
                resized_cur_pos_probs = cv2.resize(
                    cur_pos_probs,
                    dsize=(0, 0),
                    fx=0.5,
                    fy=0.5,
                    interpolation=cv2.INTER_LINEAR)
                #smoothing(resized_cur_pos_probs, len_thres=2)
                labels, num_regions = regional_growing(resized_cur_pos_probs,
                                                       pixel_val_thres=0.3)
                # upsample the label map
                ori_h, ori_w = cur_pos_probs.shape
                labels = cv2.resize(labels,
                                    dsize=(ori_w, ori_h),
                                    interpolation=cv2.INTER_NEAREST)

                cur_pos_probs, bboxes = filtering(cur_pos_probs, labels,
                                                  num_regions, 5)
                if len(proposals) == 0:
                    proposals = [[(count, bbox)] for bbox in bboxes]
                else:
                    associate_bboxes(count, bboxes, proposals)

            h, w = cur_video.height, cur_video.width
            new_proposals = [prop for prop in proposals if len(prop) >= 7]
            print('Number of proposals before and after filtering: %d, %d' %
                  (len(proposals), len(new_proposals)))

            if len(new_proposals) == 0:
                save_path = os.path.join(cfg.SAVE_DIR, 'proposals', cur_vid)
                if not os.path.exists(save_path):
                    os.makedirs(save_path)

                with open(os.path.join(save_path, 'props.txt'), 'a') as f:
                    f.close()
            else:
                stride_x = 1.0 * w / cfg.DATA_TRANSFORM.FINESIZE
                stride_y = 1.0 * h / cfg.DATA_TRANSFORM.FINESIZE
                new_proposals = resize_proposals(new_proposals, stride_x,
                                                 stride_y, w, h)
                prop_id = save_proposals(cfg, clip_imgs, new_proposals,
                                         'proposals', cur_vid, start_f,
                                         prop_id)

            if start_f == (last_start_f - clip_len * clip_stride):
                clip_imgs = clip_imgs[last_clip_len:]
            else:
                clip_imgs = []
Example #10
0
def load_track_and_features(args, video_name, p_file, v_file, p_extractor,
                            v_extractor, hs):
  date, hr_slot, camera = parse_meva_clip_name(video_name)
  # start loading video_frames first
  video_path = os.path.join(args.video_path, date, hr_slot, video_name + ".avi")
  if args.use_lijun_video_loader:
    vcap = VideoReader(video_path)
    frame_count = int(vcap.length)
  elif args.use_moviepy:
    vcap = VideoFileClip(video_path, audio=False)
    frame_count = int(vcap.fps * vcap.duration)  # uh
    vcap = vcap.iter_frames()
  else:
    try:
      vcap = cv2.VideoCapture(video_path)
      if not vcap.isOpened():
        raise Exception("cannot open %s" % video_path)
    except Exception as e:
      raise Exception("warning, cannot open %s" % video_path)
    # opencv 3/4
    frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)

  # start reading frames into queues now
  video_queuer = VideoEnqueuer(
      args, vcap, frame_count, frame_gap=1,  # no skipping frames
      prefetch=100,
      start=True, is_moviepy=args.use_moviepy,
      batch_size=1)
  get_frame_batches = video_queuer.get()

  def load_track_file(file_path, homography):
    """load a tracking file into dict of numpy arrays."""
    # assuming sorted by frameid
    data = []
    with open(file_path, "r") as f:
      for line in f:
        frame_idx, track_id, left, top, width, height, conf, _, _, _ = line.strip().split(",")
        data.append([frame_idx, track_id, left, top, width, height, conf])

    if not data:
      return {}

    data = np.array(data, dtype="float32")  # [N, 7]

    # compute topdown points
    foot_points_x = data[:, 2] + data[:, 4] / 2.  # [N]
    foot_points_y = data[:, 3] + data[:, 5]
    foot_points = np.stack([foot_points_x, foot_points_y], axis=0)  # [2, N]
    # [2, N]
    top_down_points = warp_points(foot_points, homography)
    top_down_points = np.transpose(top_down_points, [1, 0])  # [N, 2]

    # [N, 9]
    data = np.concatenate([data, top_down_points], axis=1)

    track_ids = np.unique(data[:, 1]).tolist()
    track_data = {}  # [num_track, K, 9]
    for track_id in track_ids:
      track_data[track_id] = data[data[:, 1] == track_id, :]
    return track_data

  # track_id -> data
  p_tracks = load_track_file(p_file, hs[camera])
  v_tracks = load_track_file(v_file, hs[camera])

  # get each frame's boxes to extract
  frame_data = {}  # frame_idx -> a list of boxes,
  def get_track_boxes(tracks, cat_name):
    for track_id in tracks:
      idxs = list(range(0, len(tracks[track_id]), args.feature_box_gap))
      idxs = idxs[:args.feature_box_num]
      boxes = tracks[track_id][idxs, :]  # [k, 7]

      for box_idx, box in enumerate(boxes):
        frame_idx = box[0]
        tlwh = box[2:6]
        if not frame_idx in frame_data:
          frame_data[frame_idx] = []
        frame_data[frame_idx].append((tlwh, track_id, box_idx, cat_name))
  get_track_boxes(p_tracks, "Person")
  get_track_boxes(v_tracks, "Vehicle")

  # 2. go through the video once and crop all the images to extract features
  # assuming not conflict between person/vehicle track_id
  p_track_to_feat = {}  # "track_id" => features
  v_track_to_feat = {}  # "track_id" => features

  for batch in tqdm(get_frame_batches, total=video_queuer.num_batches):
    image, scale, frame_idx = batch[0]
    image = image.astype("uint8")  # need uint8 type
    if frame_idx in frame_data:
      for tlwh, track_id, box_idx, cat_name in frame_data[frame_idx]:

        # check box valid
        if valid_box(tlwh, image):

          x, y, w, h = tlwh
          x, y, w, h = int(x), int(y), int(w), int(h)
          #print(x, y, w, h)
          #print(image[y:y+h, x:x+w])
          box_img = cv2.cvtColor(
              image[y:y+h, x:x+w], cv2.COLOR_BGR2RGB)
          if cat_name == "Person":
            if track_id not in p_track_to_feat:
              p_track_to_feat[track_id] = []
            p_track_to_feat[track_id].append(box_img)
          elif cat_name == "Vehicle":
            if track_id not in v_track_to_feat:
              v_track_to_feat[track_id] = []
            v_track_to_feat[track_id].append(box_img)
  # extract features
  def get_features(track_to_imgs, extractor):
    for track_id in track_to_imgs:
      box_imgs = track_to_imgs[track_id]
      track_to_imgs[track_id] = extractor(box_imgs).cpu().numpy()  # [K, 512]
      if args.use_avg_pool:
        # [1, 512]
        track_to_imgs[track_id] = np.mean(
            track_to_imgs[track_id], axis=0, keepdims=True)

  get_features(p_track_to_feat, p_extractor)
  get_features(v_track_to_feat, v_extractor)

  data = {}
  def gather_data(track_data, track_features, cat_name):
    data[cat_name] = {}
    for track_id in track_data:
      # ignore track with no valid boxes
      if track_id in track_features:
        data[cat_name][track_id] = (
            track_data[track_id], track_features[track_id])
  gather_data(p_tracks, p_track_to_feat, "Person")
  gather_data(v_tracks, v_track_to_feat, "Vehicle")

  return data