Python VisualizationDemo.run_on_videoの例、predictor.VisualizationDemo.run_on_video Pythonの例

コード例 #1

0

ファイルを表示

ファイル: sheepFinder.py プロジェクト: janderer/sheep_tracking

        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        basename = os.path.basename(args.video_input)

        if os.path.isfile(args.output_text_file):
            print("Output text file already exist, replacing it."
                  )  # Does bob.txt exist?  Is it a file, or a directory?
        elif os.path.isdir(args.output_text_file):
            print("Saving output in: ",
                  os.path.join(args.output_text_file, 'det_demo.txt'))

        # assert (os.path.isfile(args.output_text_file) or os.path.isdir(args.output_text_file)), "Please specify --output-text-file"
        if args.output_text_file:
            output_text_file = np.array([])
            output_is_empty = True
        for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
            if args.output_text_file:
                if len(vis_frame) == 0:
                    continue
                if vis_frame.ndim == 1:
                    vis_frame = np.array([vis_frame])  # wrap into a new axis
                if output_is_empty:
                    output_text_file = vis_frame
                    output_is_empty = False
                else:
                    output_text_file = np.concatenate(
                        [output_text_file, vis_frame])
        # release input video file
        video.release()
        if args.output_text_file:
            assert (len(output_text_file)

コード例 #2

0

ファイルを表示

def object_d2(files):
    # mp.set_start_method("spawn", force=True)
    args, unknown = get_parser().parse_known_args()
    setup_logger(name="fvcore")
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)
    demo = VisualizationDemo(cfg)
    
    for video_id in tqdm(files):
        try:

            #  Load video with CV2

            video = cv2.VideoCapture(f'{vid_folder}/{video_id}.mp4')
            print(f'Video name {"<"*10} {video_id}.mp4 >{">"*10} Loaded')

            width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frames_per_second = video.get(cv2.CAP_PROP_FPS)
            img_pixels = height*width
            num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))


            print(f'Image height, width, frames_per_second, num_frames and img_pixels is {">"*10}{(height, width, num_frames,frames_per_second, img_pixels)}')

            if frames_per_second ==0:
                pass
            else:
                print(f'video.get(cv2.CAP_PROP_FRAME_COUNT) >> {video.get(cv2.CAP_PROP_FRAME_COUNT)}')

                duration = num_frames/frames_per_second
                
                print('Total frames are ',num_frames)

                frames=[]
                # list of predictions for each frame and object

                all_preds = list(demo.run_on_video(video))

                # while num_frames!=0:
                    # semantic_predictions = next(all_preds)
                    # semantic_predictions = item

                for num_frame, semantic_predictions in enumerate(all_preds):
                    objs = []
                    for s in semantic_predictions:
                        obj = {}
                        obj["label"] = s["text"]
                        obj['area_percentage'] = float("{0:.2f}".format(s['area']/img_pixels*100))
                        obj["score"] = float("{0:.2f}".format(s["score"] if "score" in s else 1))
                        objs.append(obj)

                    obj_set = {}
                    for s in semantic_predictions:
                        k = s["text"]
                        score = s["score"] if "score" in s else 1
                        if not k in obj_set:
                            obj_set[k] = {
                                "scores": [score],
                                "areas":  [s["area"]],
                                "label": k
                            }
                        else:
                            obj_set[k]["scores"].append(score)
                            obj_set[k]["areas"].append(s["area"])

                    u_objs = []
                    for k in obj_set:
                        u = obj_set[k]
                        n = len(u["scores"])
                        score_ave = reduce((lambda x, y: x + y), u["scores"])/n
                        area_sum = reduce((lambda x, y: x + y), u["areas"])

                        obj = {}
                        obj["label"] = u["label"]
                        obj['area_percentage'] = float("{0:.2f}".format(area_sum/img_pixels*100))
                        obj["score"] = float("{0:.2f}".format(score_ave))
                        obj["count"] = n
                        u_objs.append(obj)
                    frame = {
                        "frame":num_frame+1,
                        "instances": objs,
                        "objects": u_objs,
                    }
                    frames.append(frame)
                cv2.destroyAllWindows()
                data = {
                    "video": {
                        "meta": {},
                        "base_uri": "https://videobank.blob.core.windows.net/videobank",
                        "folder": video_id,
                        "output-frame-path": ""
                    },
                    "ml-data": {
                        "object-detection": {
                            "meta": {'duration':duration, 'fps':frames_per_second,'len_frames':len(frames)},
                            "video": {},
                            "frames": frames
                        }
                    }
                }

                print(f'writing OD output inside {">"*10} {json_folder}/{video_id}.json')
                with open(f'{json_folder}/{video_id}.json', 'w') as f:
                    json.dump(data,f)

        except Exception as e:
            print(f'Caught exception during inference, error is {">"*10} {e}')
            with open(f'{basepath}/err_vidsod.txt','a') as f:
                f.write(str(e))
            pass

コード例 #3

0

ファイルを表示

ファイル: demo.py プロジェクト: solicucu/detectron2_onenet

                    assert len(
                        args.input
                    ) == 1, "Please specify a directory with args.output"
                    out_filename = args.output
                visualized_output.save(out_filename)
            else:
                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
                cv2.imshow(WINDOW_NAME,
                           visualized_output.get_image()[:, :, ::-1])
                if cv2.waitKey(0) == 27:
                    break  # esc to quit
    elif args.webcam:
        assert args.input is None, "Cannot have both --input and --webcam!"
        assert args.output is None, "output not yet supported with --webcam!"
        cam = cv2.VideoCapture(0)
        for vis in tqdm.tqdm(demo.run_on_video(cam,
                                               args.confidence_threshold)):
            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
            cv2.imshow(WINDOW_NAME, vis)
            if cv2.waitKey(1) == 27:
                break  # esc to quit
        cam.release()
        cv2.destroyAllWindows()
    elif args.video_input:
        video = cv2.VideoCapture(args.video_input)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        basename = os.path.basename(args.video_input)

        if args.output:

コード例 #4

0

ファイルを表示

ファイル: ppl.py プロジェクト: cschanot/Detectron2

            else:
                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
                cv2.imshow(WINDOW_NAME,
                           visualized_output.get_image()[:, :, ::-1])
                num_instances = len(predictions["instances"])
                classes = predictions["instances"].pred_classes.numpy
                if num_instances > 0:
                    labels = _create_text_labels(
                        classes, scores,
                        ppl.metadata.get("thing_classes", None))
                if cv2.waitKey(0) == 27:
                    break  # esc to quit
    elif args.webcam:
        assert args.input is None, "Cannot have both --input and --webcam!"
        cam = cv2.VideoCapture(0)
        for vis in tqdm.tqdm(ppl.run_on_video(cam)):
            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
            cv2.imshow(WINDOW_NAME, vis)
            if cv2.waitKey(1) == 27:
                break  # esc to quit
        cv2.destroyAllWindows()
    elif args.video_input:
        video = cv2.VideoCapture(args.video_input)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        basename = os.path.basename(args.video_input)

        if args.output:
            if os.path.isdir(args.output):

コード例 #5

0

ファイルを表示

        basename = os.path.basename(args.video_input)
        output_folder_name = os.path.join("../output/", os.path.splitext(basename)[0])
        os.makedirs(output_folder_name)
        if args.output:
            json_output_fname = os.path.join(args.output)[:-4]

        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        demo = VisualizationDemo(cfg=cfg, parallel=args.parallel)

        window_size = 5
        
        # make video with bbox and append instance information
        data_array = []
        idx = 0
        for frame_instances in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
            idx += 1
            data_array.append(frame_instances)
        video.release()
        
        num_frames = idx
        pose_refinement = PoseRefinement(
            width, height, num_frames, basename, window_size
        )
        
        # correction keypoints - using adjacent frame information
        for i, frame_instances in tqdm.tqdm(enumerate(data_array), total=len(data_array)):
            boxes = [inst.bbox.tolist() for inst in frame_instances]
            keypoints = [inst.keypoint.tolist() for inst in frame_instances]
            indices = [inst.index for inst in frame_instances]
            if i >= window_size and i < len(data_array) - window_size:

コード例 #6

0

ファイルを表示

                        args.input
                    ) == 1, "Please specify a directory with args.output"
                    out_filename = args.output
                visualized_output.save(out_filename)
            else:
                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
                cv2.imshow(WINDOW_NAME,
                           visualized_output.get_image()[:, :, ::-1])
                if cv2.waitKey(0) == 27:
                    break  # esc to quit
    elif args.webcam:
        assert args.input is None, "Cannot have both --input and --webcam!"
        assert args.output is None, "output not yet supported with --webcam!"
        # cam = cv2.VideoCapture(0)

        for vis in tqdm.tqdm(demo.run_on_video(pipeline, pc)):
            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
            cv2.imshow(WINDOW_NAME, vis)
            if cv2.waitKey(1) == 27:
                break  # esc to quit
        # cam.release()
        cv2.destroyAllWindows()
    elif args.video_input:
        video = cv2.VideoCapture(args.video_input)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        basename = os.path.basename(args.video_input)

        if args.output:

コード例 #7

0

ファイルを表示

ファイル: demo.py プロジェクト: Vistry/detectron2

                 assert len(
                     args.input
                 ) == 1, "Please specify a directory with args.output"
                 out_filename = args.output
             visualized_output.save(out_filename)
         else:
             cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
             cv2.imshow(WINDOW_NAME,
                        visualized_output.get_image()[:, :, ::-1])
             if cv2.waitKey(0) == 27:
                 break  # esc to quit
 elif args.webcam:
     assert args.input is None, "Cannot have both --input and --webcam!"
     assert args.output is None, "output not yet supported with --webcam!"
     cam = cv2.VideoCapture(0)
     for vis in tqdm.tqdm(demo.run_on_video(cam)):
         cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
         cv2.imshow(WINDOW_NAME, vis)
         if cv2.waitKey(1) == 27:
             break  # esc to quit
     cam.release()
     cv2.destroyAllWindows()
 elif args.video_input:
     video = cv2.VideoCapture(args.video_input)
     width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
     frames_per_second = video.get(cv2.CAP_PROP_FPS)
     num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
     basename = os.path.basename(args.video_input)
     #"pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
     observations = [{

コード例 #8

0

ファイルを表示

ファイル: main.py プロジェクト: kiyoon/detectron2_predict_multigpu

         output_fname = os.path.splitext(output_fname)[0] + ".mkv"
     else:
         output_fname = args.output
     assert not os.path.isfile(output_fname), output_fname
     output_file = cv2.VideoWriter(
         filename=output_fname,
         # some installation of opencv may not support x264 (due to its license),
         # you can try other format (e.g. MPEG)
         fourcc=cv2.VideoWriter_fourcc(*"x264"),
         #fourcc=cv2.VideoWriter_fourcc(*"avc1"),
         fps=float(frames_per_second),
         frameSize=(width, height),
         isColor=True,
     )
 assert os.path.isfile(args.video_input)
 for predictions, vis_frame in tqdm.tqdm(demo.run_on_video(video),
                                         total=num_frames):
     print(predictions.pred_classes)
     print(predictions.pred_boxes)
     print(predictions.scores)
     if args.output:
         output_file.write(vis_frame)
     else:
         cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
         cv2.imshow(basename, vis_frame)
         if cv2.waitKey(1) == 27:
             break  # esc to quit
 video.release()
 if args.output:
     output_file.release()
 else:

コード例 #9

0

ファイルを表示

ファイル: demo.py プロジェクト: IsraelAbebe/detectron2

                    assert len(
                        args.input
                    ) == 1, "Please specify a directory with args.output"
                    out_filename = args.output
                visualized_output.save(out_filename)
            else:
                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
                cv2.imshow(WINDOW_NAME,
                           visualized_output.get_image()[:, :, ::-1])
                if cv2.waitKey(0) == 27:
                    break  # esc to quit
    elif args.webcam:
        assert args.input is None, "Cannot have both --input and --webcam!"
        assert args.output is None, "output not yet supported with --webcam!"
        cam = cv2.VideoCapture(0)
        for vis in tqdm.tqdm(demo.run_on_video(cam)):
            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
            cv2.imshow(WINDOW_NAME, vis)
            if cv2.waitKey(1) == 27:
                break  # esc to quit
        cam.release()
        cv2.destroyAllWindows()
    elif args.video_input:
        video = cv2.VideoCapture(args.video_input)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        basename = os.path.basename(args.video_input)

        if args.output:

コード例 #10

0

ファイルを表示

ファイル: demo.py プロジェクト: Liuzhe30/gait-profile

                    assert len(
                        args.input
                    ) == 1, "Please specify a directory with args.output"
                    out_filename = args.output
                visualized_output.save(out_filename)
            else:
                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
                cv2.imshow(WINDOW_NAME,
                           visualized_output.get_image()[:, :, ::-1])
                #cv2.imshow(WINDOW_NAME, cv2.bitwise_and(visualized_output.get_image()[:, :, ::-1],visualized_output.img))
                if cv2.waitKey(0) == 27:
                    break  # esc to quit
    elif args.webcam:
        assert args.input is None, "Cannot have both --input and --webcam!"
        cam = cv2.VideoCapture(0)
        for vis in tqdm.tqdm(demo.run_on_video(cam)):
            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
            cv2.imshow(WINDOW_NAME, vis)
            if cv2.waitKey(1) == 27:
                break  # esc to quit
        cv2.destroyAllWindows()
    elif args.video_input:
        video = cv2.VideoCapture(args.video_input)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        basename = os.path.basename(args.video_input)

        if args.output:
            if os.path.isdir(args.output):

コード例 #11

0

ファイルを表示

def main_predict():
    try:
        mp.set_start_method("spawn", force=True)
        # args = get_parser().parse_args()
        args, unknown = get_parser().parse_known_args()
        setup_logger(name="fvcore")
        logger = setup_logger()
        logger.info("Arguments: " + str(args))

        cfg = setup_cfg(args)
        print("A ...................")
        demo = VisualizationDemo(cfg)
        print("B ...................")
        st = time.time()

        if args.video_input:
            # assert os.path.isfile(args.video_input)

            # vid_path = args.video_input
            # vid_name = vid_path.split('.mp4')[0]
            # stream = ffmpeg.input(vid_path)
            # stream = ffmpeg.filter(stream, 'fps', fps=args.fps, round='up')
            # stream = ffmpeg.output(stream, f'{vid_name}_resized.mp4')
            # ffmpeg.run(stream)

            # video = cv2.VideoCapture(f'{vid_name}_resized.mp4')
            # print("A")
            # message = request.get_json(force=True)
            # print("B")
            # encoded = message['image']
            # print("C")
            # decoded = base64.b64decode(encoded)
            # print("D")

            # video_decoded = np.asarray(bytearray(decoded), dtype="uint8")
            # print(f"decoded and shape is >>>>>>>>>> {video_decoded.shape}")

            # video = cv2.VideoCapture(video_decoded)
            print("E>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
            # video = cv2.VideoCapture('/data/work/colab_d2_copy/colab_d2/docker_files/detectron2/demo/new_clip_resized.mp4')

            video = cv2.VideoCapture(
                '/app/docker_files/detectron2/demo/new_clip_resized.mp4')

            width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frames_per_second = video.get(cv2.CAP_PROP_FPS)
            num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
            print(
                f'<<<<<<<<<<<<<<<<<<<<< {video.get(cv2.CAP_PROP_FRAME_COUNT)}')
            basename = os.path.basename(args.video_input)

            #######################
            duration = num_frames / frames_per_second
            print(f'num_frames is {num_frames}')
            print(f'duration is {duration} and fps is {frames_per_second}')

            counter = 0
            frames = []
            all_preds = demo.run_on_video(video)
            i = 1
            while i <= num_frames:
                semantic_predictions = next(all_preds)
                # semantic_predictions = item
                objs = []
                for s in semantic_predictions:
                    obj = {}
                    obj["label"] = s["text"]
                    obj['area_percentage'] = float("{0:.2f}".format(s['area'] /
                                                                    1000000))
                    obj["score"] = float(
                        "{0:.2f}".format(s["score"] if "score" in s else 1))
                    objs.append(obj)

                obj_set = {}
                for s in semantic_predictions:
                    k = s["text"]
                    score = s["score"] if "score" in s else 1
                    if not k in obj_set:
                        obj_set[k] = {
                            "scores": [score],
                            "areas": [s["area"]],
                            "label": k
                        }
                    else:
                        obj_set[k]["scores"].append(score)
                        obj_set[k]["areas"].append(s["area"])

                u_objs = []
                for k in obj_set:
                    u = obj_set[k]
                    n = len(u["scores"])
                    score_ave = reduce((lambda x, y: x + y), u["scores"]) / n
                    area_sum = reduce((lambda x, y: x + y), u["areas"])

                    obj = {}
                    obj["label"] = u["label"]
                    obj['area_percentage'] = float("{0:.2f}".format(area_sum /
                                                                    1000000))
                    obj["score"] = float("{0:.2f}".format(score_ave))
                    obj["count"] = n
                    u_objs.append(obj)
                frame = {
                    "frame": i,
                    "instances": objs,
                    "objects": u_objs,
                }

                print(f'num_frame is {i}')

                counter += 1
                i += 1
                frames.append(frame)
            cv2.destroyAllWindows()
        data = {
            "video": {
                "meta": {},
                "base_uri":
                "https://videobank.blob.core.windows.net/videobank",
                "folder": args.video_input,
                "output-frame-path": "pipeline/detectron2"
            },
            "ml-data": {
                "object-detection": {
                    "meta": {
                        'duration': duration,
                        'fps': frames_per_second,
                        'len_frames': len(frames)
                    },
                    "video": {},
                    "frames": frames
                }
            }
        }
        # print(f'data is {data}')
        return jsonify(data)
    except Exception as e:
        print(f"error is >>>>>>>> {e}")
        return "Wrong input format"

コード例 #12

0

ファイルを表示

ファイル: flask_app.py プロジェクト: mohitthenewj/odbatch

def object_d2(files):
    # mp.set_start_method("spawn", force=True)
    args, unknown = get_parser().parse_known_args()
    setup_logger(name="fvcore")
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)
    print("A ...................")
    demo = VisualizationDemo(cfg)
    print("B ...................")
    basepath = '/data1/code_base/mnt_data/kubenetra/integration/vids'
    for video_id in tqdm(files):
        # integration/vids/14686_.mp4
        try:
            print(f'this will be loaded >>>>>>> /mnt/az/kubenetra/blob_vid/{video_id}.mp4')

            video = cv2.VideoCapture(f'{basepath}/{video_id}.mp4')
            print("E>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
            # video = cv2.VideoCapture('/app/new_clip_resized_resized.mp4')

            width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
            # print(f'width is {width}')
            height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frames_per_second = video.get(cv2.CAP_PROP_FPS)
            img_pixels = height*width
            if frames_per_second ==0:
                pass
            else:
                num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
                print('<<<<<<<<<<<<<<<<<<<<< ',video.get(cv2.CAP_PROP_FRAME_COUNT))
                #######################

                duration = num_frames/frames_per_second
                
                print('num_frames is ',num_frames)
                # print(f'duration is {duration} and fps is {frames_per_second}')

                counter = 0 
                frames=[]
                all_preds = list(demo.run_on_video(video))
                i=1
                total_frames = num_frames
                # while num_frames!=0:
                for num_frame, semantic_predictions in enumerate(all_preds):
                    # semantic_predictions = next(all_preds)
                    # semantic_predictions = item
                    objs = []
                    for s in semantic_predictions:
                        obj = {}
                        obj["label"] = s["text"]
                        obj['area_percentage'] = float("{0:.2f}".format(s['area']/img_pixels))
                        obj["score"] = float("{0:.2f}".format(s["score"] if "score" in s else 1))
                        objs.append(obj)

                    obj_set = {}
                    for s in semantic_predictions:
                        k = s["text"]
                        score = s["score"] if "score" in s else 1
                        if not k in obj_set:
                            obj_set[k] = {
                                "scores": [score],
                                "areas":  [s["area"]],
                                "label": k
                            }
                        else:
                            obj_set[k]["scores"].append(score)
                            obj_set[k]["areas"].append(s["area"])

                    u_objs = []
                    for k in obj_set:
                        u = obj_set[k]
                        n = len(u["scores"])
                        score_ave = reduce((lambda x, y: x + y), u["scores"])/n
                        area_sum = reduce((lambda x, y: x + y), u["areas"])

                        obj = {}
                        obj["label"] = u["label"]
                        obj['area_percentage'] = float("{0:.2f}".format(area_sum/1000000))
                        obj["score"] = float("{0:.2f}".format(score_ave))
                        obj["count"] = n
                        u_objs.append(obj)
                    frame = {
                        "frame":i,
                        "instances": objs,
                        "objects": u_objs,
                    }
                
                    # print('num_frame is ',total_frames - num_frames + 1)
                    print('num_frame is ',num_frame + 1)
                    # counter+=1
                    # num_frames-=1
                    # i+=1
                    frames.append(frame)
                cv2.destroyAllWindows()
                data = {
                    "video": {
                        "meta": {},
                        "base_uri": "https://videobank.blob.core.windows.net/videobank",
                        "folder": args.video_input,
                        "output-frame-path": "pipeline/detectron2"
                    },
                    "ml-data": {
                        "object-detection": {
                            "meta": {'duration':duration, 'fps':frames_per_second,'len_frames':len(frames)},
                            "video": {},
                            "frames": frames
                        }
                    }
                }
                # print(f'data is {data}')
                # try:
                #     os.remove(f'./blob_vid/{video_id}.txt')
                # except OSError:
                #     pass

                # return data
                print(f'writing OD outs inside >>> {basepath}/{video_id}.json')
                with open(f'{basepath}/{video_id}.json', 'w') as f:
                    json.dump(data,f)
        except Exception as e:
            print(e)
            with open('./err_vids.txt','a') as f:
                f.write(str(e))
            pass