frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) if os.path.isfile(args.output_text_file): print("Output text file already exist, replacing it." ) # Does bob.txt exist? Is it a file, or a directory? elif os.path.isdir(args.output_text_file): print("Saving output in: ", os.path.join(args.output_text_file, 'det_demo.txt')) # assert (os.path.isfile(args.output_text_file) or os.path.isdir(args.output_text_file)), "Please specify --output-text-file" if args.output_text_file: output_text_file = np.array([]) output_is_empty = True for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): if args.output_text_file: if len(vis_frame) == 0: continue if vis_frame.ndim == 1: vis_frame = np.array([vis_frame]) # wrap into a new axis if output_is_empty: output_text_file = vis_frame output_is_empty = False else: output_text_file = np.concatenate( [output_text_file, vis_frame]) # release input video file video.release() if args.output_text_file: assert (len(output_text_file)
def object_d2(files): # mp.set_start_method("spawn", force=True) args, unknown = get_parser().parse_known_args() setup_logger(name="fvcore") logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) demo = VisualizationDemo(cfg) for video_id in tqdm(files): try: # Load video with CV2 video = cv2.VideoCapture(f'{vid_folder}/{video_id}.mp4') print(f'Video name {"<"*10} {video_id}.mp4 >{">"*10} Loaded') width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) img_pixels = height*width num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) print(f'Image height, width, frames_per_second, num_frames and img_pixels is {">"*10}{(height, width, num_frames,frames_per_second, img_pixels)}') if frames_per_second ==0: pass else: print(f'video.get(cv2.CAP_PROP_FRAME_COUNT) >> {video.get(cv2.CAP_PROP_FRAME_COUNT)}') duration = num_frames/frames_per_second print('Total frames are ',num_frames) frames=[] # list of predictions for each frame and object all_preds = list(demo.run_on_video(video)) # while num_frames!=0: # semantic_predictions = next(all_preds) # semantic_predictions = item for num_frame, semantic_predictions in enumerate(all_preds): objs = [] for s in semantic_predictions: obj = {} obj["label"] = s["text"] obj['area_percentage'] = float("{0:.2f}".format(s['area']/img_pixels*100)) obj["score"] = float("{0:.2f}".format(s["score"] if "score" in s else 1)) objs.append(obj) obj_set = {} for s in semantic_predictions: k = s["text"] score = s["score"] if "score" in s else 1 if not k in obj_set: obj_set[k] = { "scores": [score], "areas": [s["area"]], "label": k } else: obj_set[k]["scores"].append(score) obj_set[k]["areas"].append(s["area"]) u_objs = [] for k in obj_set: u = obj_set[k] n = len(u["scores"]) score_ave = reduce((lambda x, y: x + y), u["scores"])/n area_sum = reduce((lambda x, y: x + y), u["areas"]) obj = {} obj["label"] = u["label"] obj['area_percentage'] = float("{0:.2f}".format(area_sum/img_pixels*100)) obj["score"] = float("{0:.2f}".format(score_ave)) obj["count"] = n u_objs.append(obj) frame = { "frame":num_frame+1, "instances": objs, "objects": u_objs, } frames.append(frame) cv2.destroyAllWindows() data = { "video": { "meta": {}, "base_uri": "https://videobank.blob.core.windows.net/videobank", "folder": video_id, "output-frame-path": "" }, "ml-data": { "object-detection": { "meta": {'duration':duration, 'fps':frames_per_second,'len_frames':len(frames)}, "video": {}, "frames": frames } } } print(f'writing OD output inside {">"*10} {json_folder}/{video_id}.json') with open(f'{json_folder}/{video_id}.json', 'w') as f: json.dump(data,f) except Exception as e: print(f'Caught exception during inference, error is {">"*10} {e}') with open(f'{basepath}/err_vidsod.txt','a') as f: f.write(str(e)) pass
assert len( args.input ) == 1, "Please specify a directory with args.output" out_filename = args.output visualized_output.save(out_filename) else: cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) if cv2.waitKey(0) == 27: break # esc to quit elif args.webcam: assert args.input is None, "Cannot have both --input and --webcam!" assert args.output is None, "output not yet supported with --webcam!" cam = cv2.VideoCapture(0) for vis in tqdm.tqdm(demo.run_on_video(cam, args.confidence_threshold)): cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, vis) if cv2.waitKey(1) == 27: break # esc to quit cam.release() cv2.destroyAllWindows() elif args.video_input: video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) if args.output:
else: cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) num_instances = len(predictions["instances"]) classes = predictions["instances"].pred_classes.numpy if num_instances > 0: labels = _create_text_labels( classes, scores, ppl.metadata.get("thing_classes", None)) if cv2.waitKey(0) == 27: break # esc to quit elif args.webcam: assert args.input is None, "Cannot have both --input and --webcam!" cam = cv2.VideoCapture(0) for vis in tqdm.tqdm(ppl.run_on_video(cam)): cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, vis) if cv2.waitKey(1) == 27: break # esc to quit cv2.destroyAllWindows() elif args.video_input: video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) if args.output: if os.path.isdir(args.output):
basename = os.path.basename(args.video_input) output_folder_name = os.path.join("../output/", os.path.splitext(basename)[0]) os.makedirs(output_folder_name) if args.output: json_output_fname = os.path.join(args.output)[:-4] num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) demo = VisualizationDemo(cfg=cfg, parallel=args.parallel) window_size = 5 # make video with bbox and append instance information data_array = [] idx = 0 for frame_instances in tqdm.tqdm(demo.run_on_video(video), total=num_frames): idx += 1 data_array.append(frame_instances) video.release() num_frames = idx pose_refinement = PoseRefinement( width, height, num_frames, basename, window_size ) # correction keypoints - using adjacent frame information for i, frame_instances in tqdm.tqdm(enumerate(data_array), total=len(data_array)): boxes = [inst.bbox.tolist() for inst in frame_instances] keypoints = [inst.keypoint.tolist() for inst in frame_instances] indices = [inst.index for inst in frame_instances] if i >= window_size and i < len(data_array) - window_size:
args.input ) == 1, "Please specify a directory with args.output" out_filename = args.output visualized_output.save(out_filename) else: cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) if cv2.waitKey(0) == 27: break # esc to quit elif args.webcam: assert args.input is None, "Cannot have both --input and --webcam!" assert args.output is None, "output not yet supported with --webcam!" # cam = cv2.VideoCapture(0) for vis in tqdm.tqdm(demo.run_on_video(pipeline, pc)): cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, vis) if cv2.waitKey(1) == 27: break # esc to quit # cam.release() cv2.destroyAllWindows() elif args.video_input: video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) if args.output:
assert len( args.input ) == 1, "Please specify a directory with args.output" out_filename = args.output visualized_output.save(out_filename) else: cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) if cv2.waitKey(0) == 27: break # esc to quit elif args.webcam: assert args.input is None, "Cannot have both --input and --webcam!" assert args.output is None, "output not yet supported with --webcam!" cam = cv2.VideoCapture(0) for vis in tqdm.tqdm(demo.run_on_video(cam)): cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, vis) if cv2.waitKey(1) == 27: break # esc to quit cam.release() cv2.destroyAllWindows() elif args.video_input: video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) #"pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). observations = [{
output_fname = os.path.splitext(output_fname)[0] + ".mkv" else: output_fname = args.output assert not os.path.isfile(output_fname), output_fname output_file = cv2.VideoWriter( filename=output_fname, # some installation of opencv may not support x264 (due to its license), # you can try other format (e.g. MPEG) fourcc=cv2.VideoWriter_fourcc(*"x264"), #fourcc=cv2.VideoWriter_fourcc(*"avc1"), fps=float(frames_per_second), frameSize=(width, height), isColor=True, ) assert os.path.isfile(args.video_input) for predictions, vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): print(predictions.pred_classes) print(predictions.pred_boxes) print(predictions.scores) if args.output: output_file.write(vis_frame) else: cv2.namedWindow(basename, cv2.WINDOW_NORMAL) cv2.imshow(basename, vis_frame) if cv2.waitKey(1) == 27: break # esc to quit video.release() if args.output: output_file.release() else:
assert len( args.input ) == 1, "Please specify a directory with args.output" out_filename = args.output visualized_output.save(out_filename) else: cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) if cv2.waitKey(0) == 27: break # esc to quit elif args.webcam: assert args.input is None, "Cannot have both --input and --webcam!" assert args.output is None, "output not yet supported with --webcam!" cam = cv2.VideoCapture(0) for vis in tqdm.tqdm(demo.run_on_video(cam)): cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, vis) if cv2.waitKey(1) == 27: break # esc to quit cam.release() cv2.destroyAllWindows() elif args.video_input: video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) if args.output:
assert len( args.input ) == 1, "Please specify a directory with args.output" out_filename = args.output visualized_output.save(out_filename) else: cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) #cv2.imshow(WINDOW_NAME, cv2.bitwise_and(visualized_output.get_image()[:, :, ::-1],visualized_output.img)) if cv2.waitKey(0) == 27: break # esc to quit elif args.webcam: assert args.input is None, "Cannot have both --input and --webcam!" cam = cv2.VideoCapture(0) for vis in tqdm.tqdm(demo.run_on_video(cam)): cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) cv2.imshow(WINDOW_NAME, vis) if cv2.waitKey(1) == 27: break # esc to quit cv2.destroyAllWindows() elif args.video_input: video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) basename = os.path.basename(args.video_input) if args.output: if os.path.isdir(args.output):
def main_predict(): try: mp.set_start_method("spawn", force=True) # args = get_parser().parse_args() args, unknown = get_parser().parse_known_args() setup_logger(name="fvcore") logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) print("A ...................") demo = VisualizationDemo(cfg) print("B ...................") st = time.time() if args.video_input: # assert os.path.isfile(args.video_input) # vid_path = args.video_input # vid_name = vid_path.split('.mp4')[0] # stream = ffmpeg.input(vid_path) # stream = ffmpeg.filter(stream, 'fps', fps=args.fps, round='up') # stream = ffmpeg.output(stream, f'{vid_name}_resized.mp4') # ffmpeg.run(stream) # video = cv2.VideoCapture(f'{vid_name}_resized.mp4') # print("A") # message = request.get_json(force=True) # print("B") # encoded = message['image'] # print("C") # decoded = base64.b64decode(encoded) # print("D") # video_decoded = np.asarray(bytearray(decoded), dtype="uint8") # print(f"decoded and shape is >>>>>>>>>> {video_decoded.shape}") # video = cv2.VideoCapture(video_decoded) print("E>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") # video = cv2.VideoCapture('/data/work/colab_d2_copy/colab_d2/docker_files/detectron2/demo/new_clip_resized.mp4') video = cv2.VideoCapture( '/app/docker_files/detectron2/demo/new_clip_resized.mp4') width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) print( f'<<<<<<<<<<<<<<<<<<<<< {video.get(cv2.CAP_PROP_FRAME_COUNT)}') basename = os.path.basename(args.video_input) ####################### duration = num_frames / frames_per_second print(f'num_frames is {num_frames}') print(f'duration is {duration} and fps is {frames_per_second}') counter = 0 frames = [] all_preds = demo.run_on_video(video) i = 1 while i <= num_frames: semantic_predictions = next(all_preds) # semantic_predictions = item objs = [] for s in semantic_predictions: obj = {} obj["label"] = s["text"] obj['area_percentage'] = float("{0:.2f}".format(s['area'] / 1000000)) obj["score"] = float( "{0:.2f}".format(s["score"] if "score" in s else 1)) objs.append(obj) obj_set = {} for s in semantic_predictions: k = s["text"] score = s["score"] if "score" in s else 1 if not k in obj_set: obj_set[k] = { "scores": [score], "areas": [s["area"]], "label": k } else: obj_set[k]["scores"].append(score) obj_set[k]["areas"].append(s["area"]) u_objs = [] for k in obj_set: u = obj_set[k] n = len(u["scores"]) score_ave = reduce((lambda x, y: x + y), u["scores"]) / n area_sum = reduce((lambda x, y: x + y), u["areas"]) obj = {} obj["label"] = u["label"] obj['area_percentage'] = float("{0:.2f}".format(area_sum / 1000000)) obj["score"] = float("{0:.2f}".format(score_ave)) obj["count"] = n u_objs.append(obj) frame = { "frame": i, "instances": objs, "objects": u_objs, } print(f'num_frame is {i}') counter += 1 i += 1 frames.append(frame) cv2.destroyAllWindows() data = { "video": { "meta": {}, "base_uri": "https://videobank.blob.core.windows.net/videobank", "folder": args.video_input, "output-frame-path": "pipeline/detectron2" }, "ml-data": { "object-detection": { "meta": { 'duration': duration, 'fps': frames_per_second, 'len_frames': len(frames) }, "video": {}, "frames": frames } } } # print(f'data is {data}') return jsonify(data) except Exception as e: print(f"error is >>>>>>>> {e}") return "Wrong input format"
def object_d2(files): # mp.set_start_method("spawn", force=True) args, unknown = get_parser().parse_known_args() setup_logger(name="fvcore") logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) print("A ...................") demo = VisualizationDemo(cfg) print("B ...................") basepath = '/data1/code_base/mnt_data/kubenetra/integration/vids' for video_id in tqdm(files): # integration/vids/14686_.mp4 try: print(f'this will be loaded >>>>>>> /mnt/az/kubenetra/blob_vid/{video_id}.mp4') video = cv2.VideoCapture(f'{basepath}/{video_id}.mp4') print("E>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") # video = cv2.VideoCapture('/app/new_clip_resized_resized.mp4') width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # print(f'width is {width}') height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) img_pixels = height*width if frames_per_second ==0: pass else: num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) print('<<<<<<<<<<<<<<<<<<<<< ',video.get(cv2.CAP_PROP_FRAME_COUNT)) ####################### duration = num_frames/frames_per_second print('num_frames is ',num_frames) # print(f'duration is {duration} and fps is {frames_per_second}') counter = 0 frames=[] all_preds = list(demo.run_on_video(video)) i=1 total_frames = num_frames # while num_frames!=0: for num_frame, semantic_predictions in enumerate(all_preds): # semantic_predictions = next(all_preds) # semantic_predictions = item objs = [] for s in semantic_predictions: obj = {} obj["label"] = s["text"] obj['area_percentage'] = float("{0:.2f}".format(s['area']/img_pixels)) obj["score"] = float("{0:.2f}".format(s["score"] if "score" in s else 1)) objs.append(obj) obj_set = {} for s in semantic_predictions: k = s["text"] score = s["score"] if "score" in s else 1 if not k in obj_set: obj_set[k] = { "scores": [score], "areas": [s["area"]], "label": k } else: obj_set[k]["scores"].append(score) obj_set[k]["areas"].append(s["area"]) u_objs = [] for k in obj_set: u = obj_set[k] n = len(u["scores"]) score_ave = reduce((lambda x, y: x + y), u["scores"])/n area_sum = reduce((lambda x, y: x + y), u["areas"]) obj = {} obj["label"] = u["label"] obj['area_percentage'] = float("{0:.2f}".format(area_sum/1000000)) obj["score"] = float("{0:.2f}".format(score_ave)) obj["count"] = n u_objs.append(obj) frame = { "frame":i, "instances": objs, "objects": u_objs, } # print('num_frame is ',total_frames - num_frames + 1) print('num_frame is ',num_frame + 1) # counter+=1 # num_frames-=1 # i+=1 frames.append(frame) cv2.destroyAllWindows() data = { "video": { "meta": {}, "base_uri": "https://videobank.blob.core.windows.net/videobank", "folder": args.video_input, "output-frame-path": "pipeline/detectron2" }, "ml-data": { "object-detection": { "meta": {'duration':duration, 'fps':frames_per_second,'len_frames':len(frames)}, "video": {}, "frames": frames } } } # print(f'data is {data}') # try: # os.remove(f'./blob_vid/{video_id}.txt') # except OSError: # pass # return data print(f'writing OD outs inside >>> {basepath}/{video_id}.json') with open(f'{basepath}/{video_id}.json', 'w') as f: json.dump(data,f) except Exception as e: print(e) with open('./err_vids.txt','a') as f: f.write(str(e)) pass