예제 #1
0
 def __init__(self,
              video_file,
              img_size=(416, 416),
              gpu=None,
              num_threads=8,
              offset=0,
              is_torch=True):
     self.is_torch = is_torch
     if is_torch:
         decord.bridge.set_bridge('torch')
     if type(img_size) is tuple:
         self.img_size = img_size
     else:
         self.img_size = (img_size, img_size)
     self.offset = offset
     if gpu is None:
         ctx = decord.cpu()
     else:
         ctx = decord.gpu(gpu)
     if type(img_size) == int:
         img_size = (img_size, img_size)
     self._vr = VideoReader(video_file,
                            ctx=ctx,
                            width=img_size[0],
                            height=img_size[1],
                            num_threads=num_threads)
예제 #2
0
def decord_sequential_cpu_benchmark(config):
    """Benchmarking decord library with seqeuential read"""
    device = "cpu"
    if device == "gpu":
        ctx = decord.gpu(0)
    else:
        ctx = decord.cpu()

    video_reader = decord.VideoReader(config["video_path"], ctx)
    assert config["resize_shape"] is False, "TODO: implement tranformation of image size for " \
                                            "decord_sequential_cpu_benchmark; note it has inbuilt" \
                                            "support for this. "
    assert config["downsample"] == 1, "TODO: implement downsampling," \
                                      " note that decord has options " \
                                      "to sample frames every N frames" \
                                      " https://github.com/dmlc/decord#videoloader" \
                                      "Also the video reader has " \
                                      "video_reader.skip_frames(N) function"
    # video_reader = decord.VideoReader(config["video_path"], ctx,
    #                        width=resize_width,
    #                        height=resize_height)

    for timer in tqdm(
            _TIME.measure_many(inspect.currentframe().f_code.co_name,
                               samples=config["repeats"])):
        frames_read = 0
        with tqdm(total=config["n_frames"]) as pbar:
            while frames_read < config["n_frames"]:
                try:
                    img = video_reader.next()
                except StopIteration:
                    break

                img = cv2.cvtColor(img.asnumpy(), cv2.COLOR_BGR2RGB)

                if config["show_img"]:
                    cv2.imshow("img", img)
                    k = cv2.waitKey(1)
                    if ord("q") == k:
                        break

                blocking_call(config["consumer_blocking_config"]["io_limited"],
                              config["consumer_blocking_config"]["duration"])

                frames_read += 1
                pbar.update()
        assert frames_read == config["n_frames"]
        timer.stop()
        del img
        del video_reader
        video_reader = decord.VideoReader(config["video_path"], ctx)
def get_frames(video_path: Path, num_frames: int, resize_coeff: Tuple[int,
                                                                      int],
               transform: albu.Compose, decode_gpu: bool) -> Dict[str, Any]:
    try:
        if decode_gpu:
            video = VideoReader(str(video_path), ctx=gpu(0))
        else:
            video = VideoReader(str(video_path), ctx=cpu(0))

        len_video = len(video)

        if num_frames is None:
            frame_ids = list(range(len_video))
        else:
            if len_video < num_frames:
                step = 1
            else:
                step = int(len_video / num_frames)

            frame_ids = list(range(0, len_video, step))[:num_frames]

        frames = video.get_batch(frame_ids).asnumpy()

        torched_frames, resize_factor = prepare_frames(frames, resize_coeff,
                                                       transform)

        result = {
            "torched_frames": torched_frames,
            "resize_factor": resize_factor,
            "video_path": video_path,
            "frame_ids": np.array(frame_ids),
            "frames": frames,
        }
    except DECORDError:
        print(f"{video_path} is broken")
        result = {}

    return result
예제 #4
0
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../python'))
import decord as de

parser = argparse.ArgumentParser("Decord benchmark")
parser.add_argument('--gpu', type=int, default=-1, help='context to run, use --gpu=-1 to use cpu only')
parser.add_argument('--file', type=str, default='/tmp/testsrc_h264_100s_default.mp4', help='Test video')
parser.add_argument('--seed', type=int, default=666, help='numpy random seed for random access indices')
parser.add_argument('--random-frames', type=int, default=300, help='number of random frames to run')
parser.add_argument('--width', type=int, default=320, help='resize frame width')
parser.add_argument('--height', type=int, default=240, help='resize frame height')

args = parser.parse_args()

test_video = args.file
if args.gpu > -1:
    ctx = de.gpu(args.gpu)
else:
    ctx = de.cpu()

vr = de.VideoReader(test_video, ctx, width=args.width, height=args.height)
cnt = 0
tic = time.time()
while True:
    try:
        frame = vr.next()
    except StopIteration:
        break
    cnt += 1
print(cnt, ' frames, elapsed time for sequential read: ', time.time() - tic)

np.random.seed(args.seed)  # fix seed for all random tests
def get_decord_gpu(path):
    images_av = []
    vr = VideoReader(path, ctx=gpu(0))
    for i in range(len(vr)):
        # the video reader will handle seeking and skipping in the most efficient manner
        images_av.append(vr[i])
loaders = []
times = []
times_random_seek = []
video = []
num_frames = []
lib_version = []

for i in range(args.n):
    for file in os.listdir("../videos"):
        if file in ["README", ".ipynb_checkpoints", "avadl.py"]:
            print(f"Skipping {file}")
            continue

        path = os.path.join("../videos/", file)
        images_av = []
        vr = VideoReader(path, ctx=gpu(0))
        for i in range(len(vr)):
            # the video reader will handle seeking and skipping in the most efficient manner
            images_av.append(vr[i])

        nframes = len(images_av)

        print(path, nframes)

        video.append(file)
        loaders.append("decord_gpu")
        num_frames.append(nframes)
        lib_version.append(decord.__version__)

        times.append(
            timeit.timeit(
예제 #7
0
def decord_batch_cpu_benchmark(config, buffer_size):
    """Benchmarking decord library with a batched implementation for reaching sequentially"""
    device = "cpu"
    if device == "gpu":
        ctx = decord.gpu(0)
    else:
        ctx = decord.cpu()

    np_arr_shape = get_video_shape(config["video_path"])

    video_loader = decord.VideoLoader([config["video_path"]],
                                      ctx,
                                      shape=(buffer_size, *np_arr_shape),
                                      interval=1,
                                      skip=1,
                                      shuffle=0)

    assert config["resize_shape"] is False, "TODO: implement tranformation of image size for " \
                                            "decord_sequential_cpu_benchmark; note it has inbuilt" \
                                            "support for this. "
    assert config["downsample"] == 1, "TODO: implement downsampling, " \
                                      "note that decord has options " \
                                      "to sample frames every N frames" \
                                      " https://github.com/dmlc/decord#videoloader" \
                                      "Also the video reader has" \
                                      " video_reader.skip_frames(N) function"

    for timer in tqdm(
            _TIME.measure_many(inspect.currentframe().f_code.co_name,
                               samples=config["repeats"])):
        frames_read = 0
        with tqdm(total=config["n_frames"]) as pbar:
            for batch in video_loader:
                if frames_read >= config["n_frames"]:
                    break

                data = batch[0].asnumpy()
                for img in data:
                    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                    if config["show_img"]:
                        cv2.imshow("img", img)
                        k = cv2.waitKey(1)
                        if ord("q") == k:
                            break

                    blocking_call(
                        config["consumer_blocking_config"]["io_limited"],
                        config["consumer_blocking_config"]["duration"])

                    frames_read += 1
                    pbar.update()
                    if frames_read >= config["n_frames"]:
                        break

        assert frames_read == config["n_frames"]
        timer.stop()
        video_loader.reset()
        try:
            del img  # pylint: disable = undefined-loop-variable
        except NameError:
            pass

        del video_loader
        video_loader = decord.VideoLoader([config["video_path"]],
                                          ctx,
                                          shape=(buffer_size, *np_arr_shape),
                                          interval=1,
                                          skip=1,
                                          shuffle=0)
def main():
    args = get_args()
    torch.set_grad_enabled(False)

    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    else:
        raise NotImplementedError(f"Only mobile0.25 and resnet50 are suppoted.")

    # net and model
    net = RetinaFace(cfg=cfg, phase="test")
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    if args.fp16:
        net = net.half()

    print("Finished loading model!")
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    file_paths = sorted(args.input_path.rglob("*.mp4"))[: args.num_videos]

    if args.num_gpu is not None:
        start, end = split_array(len(file_paths), args.num_gpu, args.gpu_id)
        file_paths = file_paths[start:end]

    output_path = args.output_path

    if args.save_boxes:
        output_label_path = output_path / "labels"
        output_label_path.mkdir(exist_ok=True, parents=True)

    if args.save_crops:
        output_image_path = output_path / "images"
        output_image_path.mkdir(exist_ok=True, parents=True)

    if args.video_decoder == "cpu":
        decode_device = cpu(0)
    elif args.video_decoder == "gpu":
        decode_device = gpu(0)
    else:
        raise NotImplementedError(f"Only CPU and GPU devices are supported by decard, but got {args.video_decoder}")

    transform = albu.Compose([albu.Normalize(p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1)], p=1)

    with torch.no_grad():
        for video_path in tqdm(file_paths):
            labels = []
            video_id = video_path.stem

            with video_reader(str(video_path), ctx=decode_device) as video:
                len_video = len(video)

                if args.num_frames is None or args.num_frames == 1:
                    frame_ids = list(range(args.num_frames))
                elif args.num_frames > 1:
                    if len_video < args.num_frames:
                        step = 1
                    else:
                        step = int(len_video / args.num_frames)

                    frame_ids = list(range(0, len_video, step))[: args.num_frames]
                else:
                    raise ValueError(f"Expect None or integer > 1 for args.num_frames, but got {args.num_frames}")

                frames = video.get_batch(frame_ids)

                if args.video_decoder == "cpu":
                    frames = frames.asnumpy()
                elif args.video_decoder == "gpu":
                    frames = dlpack.from_dlpack(frames.to_dlpack())

                if args.video_decoder == "gpu":
                    del video
                    torch.cuda.empty_cache()

                    gc.collect()

            num_frames = len(frames)

            image_height = frames.shape[1]
            image_width = frames.shape[2]

            scale1 = torch.Tensor(
                [
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                ]
            )

            scale1 = scale1.to(device)

            scale = torch.Tensor([image_width, image_height, image_width, image_height])
            scale = scale.to(device)

            priorbox = PriorBox(cfg, image_size=(image_height, image_width))
            priors = priorbox.forward()
            priors = priors.to(device)
            prior_data = priors.data

            if args.resize_coeff is not None:
                target_size = min(args.resize_coeff)
                max_size = max(args.resize_coeff)

                image_height = frames.shape[1]
                image_width = frames.shape[2]

                image_size_min = min([image_width, image_height])
                image_size_max = max([image_width, image_height])

                resize = float(target_size) / float(image_size_min)
                if np.round(resize * image_size_max) > max_size:
                    resize = float(max_size) / float(image_size_max)
            else:
                resize = 1

            for pred_id in range(num_frames):
                frame = frames[pred_id]

                torched_image = prepare_image(frame, transform, args.video_decoder).to(device)

                if args.fp16:
                    torched_image = torched_image.half()

                loc, conf, land = net(torched_image)  # forward pass

                frame_id = frame_ids[pred_id]

                boxes = decode(loc.data[0], prior_data, cfg["variance"])

                boxes *= scale / resize

                boxes = boxes.cpu().numpy()
                scores = conf[0].data.cpu().numpy()[:, 1]

                landmarks = decode_landm(land.data[0], prior_data, cfg["variance"])

                landmarks *= scale1 / resize
                landmarks = landmarks.cpu().numpy()

                # ignore low scores
                valid_index = np.where(scores > args.confidence_threshold)[0]
                boxes = boxes[valid_index]
                landmarks = landmarks[valid_index]
                scores = scores[valid_index]

                # keep top-K before NMS
                order = scores.argsort()[::-1]
                # order = scores.argsort()[::-1][:args.top_k]
                boxes = boxes[order]
                landmarks = landmarks[order]
                scores = scores[order]

                # do NMS
                detection = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
                keep = py_cpu_nms(detection, args.nms_threshold)
                # keep = nms(detection, args.nms_threshold,force_cpu=args.cpu)

                # x_min, y_min, x_max, y_max, score
                detection = detection[keep, :]

                landmarks = landmarks[keep].astype(int)

                if detection.shape[0] == 0:
                    continue

                bboxes = detection[:, :4].astype(int)
                confidence = detection[:, 4].astype(np.float64)

                for crop_id in range(len(detection)):

                    bbox = bboxes[crop_id]

                    labels += [
                        {
                            "frame_id": int(frame_id),
                            "crop_id": crop_id,
                            "bbox": bbox.tolist(),
                            "score": confidence[crop_id],
                            "landmarks": landmarks[crop_id].tolist(),
                        }
                    ]

                    if args.save_crops:
                        x_min, y_min, x_max, y_max = bbox

                        x_min = max(0, x_min)
                        y_min = max(0, y_min)

                        crop = frame[y_min:y_max, x_min:x_max]

                        target_folder = output_image_path / f"{video_id}"
                        target_folder.mkdir(exist_ok=True, parents=True)

                        crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg"

                        if crop_file_path.exists():
                            continue

                        cv2.imwrite(
                            str(crop_file_path),
                            cv2.cvtColor(crop, cv2.COLOR_BGR2RGB),
                            [int(cv2.IMWRITE_JPEG_QUALITY), 90],
                        )

                if args.save_boxes:
                    result = {
                        "file_path": str(video_path),
                        "file_id": video_id,
                        "bboxes": labels,
                    }

                    with open(output_label_path / f"{video_id}.json", "w") as f:
                        json.dump(result, f, indent=2)
예제 #9
0
파일: utils.py 프로젝트: nepython/meet2pdf
def extract_frames(video_path,
                   frames_dir,
                   custom_coordinates,
                   start=-1,
                   end=-1,
                   seconds=0.1,
                   meet=True):
    """
    Extract frames from a video using decord's VideoReader
        :param video_path: path of the video
        :param frames_dir: the directory to save the frames
        :param overwrite: to overwrite frames that already exist?
        :param start: start frame
        :param end: end frame
        :param seconds: frame spacing
        :return: count of images saved
    """

    video_path = os.path.normpath(
        video_path)  # make the paths OS (Windows) compatible
    frames_dir = os.path.normpath(
        frames_dir)  # make the paths OS (Windows) compatible

    video_dir, video_filename = os.path.split(
        video_path)  # get the video path and filename from the path

    assert os.path.exists(video_path)  # assert the video file exists

    vidcap = cv2.VideoCapture(video_path)
    fps = int(vidcap.get(cv2.CAP_PROP_FPS))
    if fps == 0:
        return False
    seconds = int(seconds * fps)
    frameToStore = None

    try:
        vr = VideoReader(video_path, ctx=gpu(0))  # can set to cpu or gpu
    except:
        vr = VideoReader(video_path, ctx=cpu(0))  # can set to cpu or gpu

    if meet:
        shareScreenCoverage = {"h": float(0.75), "w": float(0.75)}
    else:
        shareScreenCoverage = {"h": float(1), "w": float(1)}
    if start < 0:  # if start isn't specified lets assume 0
        start = 0
    if end < 0:  # if end isn't specified assume the end of the video
        end = len(vr)

    frames_list = list(range(start, end, seconds))
    saved_count = 0
    frames = vr.get_batch(frames_list).asnumpy()

    for index, frame in zip(
            frames_list, frames):  # lets loop through the frames until the end
        save_path = os.path.join(
            frames_dir, video_filename,
            f"frame{saved_count}.jpg")  # create the save path
        newFrame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        h, w, dimension = newFrame.shape
        if meet:
            croppedImageAttributes = {
                "top": int(0.125 * h),
                "bottom": int(0.875 * h),
                "left": int(0),
                "right": int(0.75 * w),
            }
        else:
            croppedImageAttributes = {
                "top": int(custom_coordinates["top"] * h),
                "bottom": int((1 - custom_coordinates["bottom"]) * h),
                "left": int(custom_coordinates["left"] * w),
                "right": int((1 - custom_coordinates["right"]) * w),
            }
        # to crop Google meet slides frame only and ignore the speaker part of screen
        newFrame = newFrame[
            croppedImageAttributes["top"]:croppedImageAttributes["bottom"],
            croppedImageAttributes["left"]:croppedImageAttributes["right"], ]

        # have seen atleast 1 frame before.
        if frameToStore is not None:
            # compare new frame with last frame
            same: bool = CheckSimilarity(frameToStore, newFrame)
            # save last frame if last frame is not same as new frame
            if not same:
                cv2.imwrite(save_path,
                            frameToStore)  # save the extracted image
                saved_count += 1  # increment our counter by one
        frameToStore = newFrame

    # save the last image too if it was diff from prev frame
    if not same:
        cv2.imwrite(save_path, frameToStore)  # save the extracted image
        saved_count += 1
    return True