def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) # net initialize if args.detector: detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV4, env_id=args.env_id, ) else: detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) detector.set_input_shape((1, 3, IMAGE_HEIGHT, IMAGE_WIDTH)) if args.video is not None: # video mode recognize_from_video(detector) else: # image mode recognize_from_image(detector)
def main(): # model files check and download logger.info('Check vehicle-attributes-recognition model...') check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) if args.video or args.detection: logger.info('Check object detection model...') check_and_download_models(DT_WEIGHT_PATH, DT_MODEL_PATH, DT_REMOTE_PATH) env_id = args.env_id # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.video or args.detection: detector = ailia.Detector( DT_MODEL_PATH, DT_WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id, ) else: detector = None if args.video: # video mode recognize_from_video(net, detector) else: # image mode recognize_from_image(net, detector)
def main(): # model files check and download logger.info('=== GMM model ===') check_and_download_models(WEIGHT_GMM_PATH, MODEL_GMM_PATH, REMOTE_PATH) logger.info('=== TOM model ===') check_and_download_models(WEIGHT_TOM_PATH, MODEL_TOM_PATH, REMOTE_PATH) if args.video or not args.keypoints: logger.info('=== detector model ===') check_and_download_models(WEIGHT_YOLOV3_PATH, MODEL_YOLOV3_PATH, REMOTE_YOLOV3_PATH) logger.info('=== pose model ===') check_and_download_models(WEIGHT_POSE_PATH, MODEL_POSE_PATH, REMOTE_POSE_PATH) if args.video or not args.parse: logger.info('=== human segmentation model ===') check_and_download_models(WEIGHT_SEG_PATH, MODEL_SEG_PATH, REMOTE_SEG_PATH) # initialize if args.onnx: import onnxruntime GMM_net = onnxruntime.InferenceSession(WEIGHT_GMM_PATH) TOM_net = onnxruntime.InferenceSession(WEIGHT_TOM_PATH) else: GMM_net = ailia.Net(MODEL_GMM_PATH, WEIGHT_GMM_PATH, env_id=args.env_id) TOM_net = ailia.Net(MODEL_TOM_PATH, WEIGHT_TOM_PATH, env_id=args.env_id) if args.video or not args.keypoints: det_net = ailia.Detector( MODEL_YOLOV3_PATH, WEIGHT_YOLOV3_PATH, 80, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) pose_net = ailia.Net(MODEL_POSE_PATH, WEIGHT_POSE_PATH, env_id=args.env_id) else: det_net = pose_net = None if args.video or not args.parse: seg_net = ailia.Net(MODEL_SEG_PATH, WEIGHT_SEG_PATH, env_id=args.env_id) else: seg_net = None if args.video is not None: # video mode recognize_from_video(GMM_net, TOM_net, det_net, pose_net, seg_net) else: # image mode recognize_from_image(GMM_net, TOM_net, det_net, pose_net, seg_net)
def recognize_from_image(): # prepare input data img = load_image(args.input) print(f'input image shape: {img.shape}') # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_S_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2, env_id=env_id ) detector.set_anchors(ANCHORS) # compute execution time for i in range(5): start = int(round(time.time() * 1000)) detector.compute(img, THRESHOLD, IOU) end = int(round(time.time() * 1000)) print(f'ailia processing time {end - start} ms') # plot result res_img = plot_results(detector, img, COCO_CATEGORY) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_image(): # prepare input data img = load_image(args.input) print(f'input image shape: {img.shape}') # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Detector(MODEL_PATH, WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(img, THRESHOLD, IOU) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: detector.compute(img, THRESHOLD, IOU) # plot result res_img = plot_results(detector, img, FACE_CATEGORY) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_image(): # prepare input data img = load_image(args.input) print(f'input image shape: {img.shape}') # net initialize detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE: detector.set_input_shape(args.detection_width, args.detection_height) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(img, args.threshold, args.iou) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: detector.compute(img, args.threshold, args.iou) # plot result res_img = plot_results(detector, img, COCO_CATEGORY) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) env_id = args.env_id if args.detector: detector = ailia.Detector(MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_BGR, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_INT8, algorithm=ailia.DETECTOR_ALGORITHM_YOLOX, env_id=env_id) if args.detection_width != -1 or args.detection_height != -1: detector.set_input_shape(args.detection_width, args.detection_height) else: detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.detection_width != -1 or args.detection_height != -1: global WIDTH, HEIGHT WIDTH = args.detection_width HEIGHT = args.detection_height detector.set_input_shape((1, 3, HEIGHT, WIDTH)) if args.video is not None: # video mode recognize_from_video(detector) else: # image mode recognize_from_image(detector)
def recognize_from_video(): # net initialize detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE: detector.set_input_shape( args.detection_width, args.detection_height ) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None if args.write_prediction: frame_count = 0 frame_digit = int(math.log10(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + 1) video_name = os.path.splitext(os.path.basename(args.video))[0] while(True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, args.threshold, args.iou) res_img = plot_results(detector, frame, COCO_CATEGORY, False) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) # write prediction if args.write_prediction: savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png') pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detector, frame, COCO_CATEGORY) frame_count += 1 capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_image(): # net initialize detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE: detector.set_input_shape( args.detection_width, args.detection_height ) if args.profile: detector.set_profile_mode(True) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) img = load_image(image_path) logger.debug(f'input image shape: {img.shape}') # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) detector.compute(img, args.threshold, args.iou) end = int(round(time.time() * 1000)) if i != 0: total_time = total_time + (end - start) logger.info(f'\tailia processing time {end - start} ms') logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms') else: detector.compute(img, args.threshold, args.iou) # plot result res_img = plot_results(detector, img, COCO_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) # write prediction if args.write_prediction: pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detector, img, COCO_CATEGORY) if args.profile: print(detector.get_summary()) logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize if args.detector == True: detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_S_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2, env_id=args.env_id, ) detector.set_anchors(ANCHORS) else: net = ailia.Net(None, WEIGHT_PATH) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break if args.detector: img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, THRESHOLD, IOU) res_img = plot_results(detector, frame, CATEGORY, False) else: img_PIL = Image.fromarray(frame) img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) img = cv2.resize(img, (416, 416)) img = img.transpose((2, 0, 1)) / 255 img = img[np.newaxis, :, :, :].astype(np.float32) results = net.run([img]) results = torch.FloatTensor(results[0]) output_img = detect(img_PIL, results, video=True) res_img = np.array(output_img, dtype=np.uint8) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize detector = ailia.Detector(MODEL_PATH, WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=RANGE, algorithm=ALGORITHM, env_id=args.env_id) capture = webcamera_utils.get_capture(args.video) if args.savepath != SAVE_IMAGE_PATH: writer = webcamera_utils.get_writer( args.savepath, IMAGE_HEIGHT, IMAGE_WIDTH, fps=capture.get(cv2.CAP_PROP_FPS), ) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break _, resized_img = webcamera_utils.adjust_frame_size( frame, IMAGE_HEIGHT, IMAGE_WIDTH) img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2BGRA) detector.compute(img, THRESHOLD, IOU) detections = [] for idx in range(detector.get_object_count()): obj = detector.get_object(idx) detections.append(obj) detections = nms_between_categories(detections, frame.shape[1], frame.shape[0], categories=[0, 1], iou_threshold=IOU) res_img = plot_results(detections, resized_img, FACE_CATEGORY, False) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def init_detector(env_id): detector = ailia.Detector(DT_MODEL_PATH, DT_WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) return detector
def recognize_from_image(): # net initialize categories = 80 threshold = 0.4 iou = 0.45 detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, categories, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_SSD, env_id=args.env_id, ) if args.profile: detector.set_profile_mode(True) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) org_img = load_image( image_path, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None', ) if org_img.shape[2] == 3: org_img = cv2.cvtColor(org_img, cv2.COLOR_RGB2BGRA) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(org_img, threshold, iou) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: detector.compute(org_img, threshold, iou) # postprocessing res_img = plot_results(detector, org_img, VOC_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) if args.profile: print(detector.get_summary()) logger.info('Script finished successfully.')
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) # check folder existing if not os.path.exists(args.input): print("error : directory not found "+args.input) sys.exit(1) if not os.path.exists(args.output): os.mkdir(args.output) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') if args.arch == 'blazeface': detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) else: detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id ) # process images no = 0 for src_dir, dirs, files in os.walk(args.input): files = sorted(files) for file_ in files: root, ext = os.path.splitext(file_) if file_==".DS_Store": continue if file_=="Thumbs.db": continue if not(ext == ".jpg" or ext == ".png" or ext == ".bmp"): continue print(src_dir+"/"+file_) folders=src_dir.split("/") folder=folders[len(folders)-1] dst_dir = args.output+"/"+folder if not os.path.exists(dst_dir): os.mkdir(dst_dir) dst_path= dst_dir+ "/"+str(no)+".jpg" recognize_from_image(detector,dst_path,src_dir,file_) no=no+1
def recognize_from_image(): # net initialize detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) img = load_image(image_path) logger.debug(f'input image shape: {img.shape}') # inference logger.info('Start inference...') detector.compute(img, THRESHOLD, IOU) # pose estimation if args.benchmark: logger.info('BENCHMARK mode') total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) pose_detections = pose_estimation(detector, pose, img) end = int(round(time.time() * 1000)) logger.info( f'\tailia processing detection time {end - start} ms') if i != 0: total_time = total_time + (end - start) logger.info( f'\taverage detection time {total_time / (args.benchmark_count-1)} ms' ) else: pose_detections = pose_estimation(detector, pose, img) # plot result res_img = plot_results(detector, pose, img, COCO_CATEGORY, pose_detections) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=RANGE, algorithm=ALGORITHM, env_id=env_id ) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while(True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue _, resized_img = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH) img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2BGRA) detector.compute(img, THRESHOLD, IOU) detections = [] for idx in range(detector.get_object_count()): obj = detector.get_object(idx) detections.append(obj) detections=nms_between_categories(detections,frame.shape[1],frame.shape[0],categories=[0,1],iou_threshold=IOU) res_img = plot_results(detections, resized_img, FACE_CATEGORY, False) cv2.imshow('frame', res_img) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_image(): # net initialize detector = ailia.Detector(MODEL_PATH, WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=RANGE, algorithm=ALGORITHM, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) img = load_image(image_path) logger.debug(f'input image shape: {img.shape}') # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(img, THRESHOLD, IOU) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: detector.compute(img, THRESHOLD, IOU) # nms detections = [] for idx in range(detector.get_object_count()): obj = detector.get_object(idx) detections.append(obj) detections = nms_between_categories( detections, img.shape[1], img.shape[0], categories=[0, 1], iou_threshold=IOU, ) # plot result res_img = plot_results(detections, img, FACE_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def __init__(self): check_and_download_models(WEIGHT_PATH_YOLO, MODEL_PATH_YOLO, REMOTE_PATH_YOLO) # net initialize env_id = ailia.get_gpu_environment_id() self.detector = detector = ailia.Detector( MODEL_PATH_YOLO, WEIGHT_PATH_YOLO, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id)
def recognize_from_video(): # net initialize categories = 80 threshold = 0.4 iou = 0.45 detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, categories, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_SSD, env_id=args.env_id, ) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: writer = webcamera_utils.get_writer( args.savepath, IMAGE_HEIGHT, IMAGE_WIDTH ) else: writer = None while(True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break _, resized_img = webcamera_utils.adjust_frame_size( frame, IMAGE_HEIGHT, IMAGE_WIDTH ) img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2BGRA) detector.compute(img, threshold, iou) res_img = plot_results(detector, resized_img, VOC_CATEGORY, False) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE: detector.set_input_shape( args.detection_width, args.detection_height ) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None while(True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, args.threshold, args.iou) res_img = plot_results(detector, frame, COCO_CATEGORY, False) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_image(): # net initialize detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_S_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2, env_id=args.env_id, ) detector.set_anchors(ANCHORS) if args.profile: detector.set_profile_mode(True) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) img = load_image(image_path) logger.debug(f'input image shape: {img.shape}') # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(img, THRESHOLD, IOU) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: detector.compute(img, THRESHOLD, IOU) # plot result res_img = plot_results(detector, img, COCO_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) if args.profile: print(detector.get_summary()) logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_S_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2, env_id=env_id ) detector.set_anchors(ANCHORS) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while(True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue _, resized_img = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH) img = cv2.cvtColor(resized_img, cv2.COLOR_RGB2BGRA) detector.compute(img, THRESHOLD, IOU) res_img = plot_results(detector, resized_img, COCO_CATEGORY, False) cv2.imshow('frame', res_img) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def main(): # model files check and download detector = True if detector: logger.info('=== detector model ===') check_and_download_models(WEIGHT_YOLOV3_PATH, MODEL_YOLOV3_PATH, REMOTE_YOLOV3_PATH) logger.info('=== animalpose model ===') info = { 'hrnet32': (WEIGHT_HRNET_W32_PATH, MODEL_HRNET_W32_PATH), 'hrnet48': (WEIGHT_HRNET_W48_PATH, MODEL_HRNET_W48_PATH), 'res50': (WEIGHT_RESNET_50_PATH, MODEL_RESNET_50_PATH), 'res101': (WEIGHT_RESNET_101_PATH, MODEL_RESNET_101_PATH), 'res152': (WEIGHT_RESNET_152_PATH, MODEL_RESNET_152_PATH), } weight_path, model_path = info[args.model] check_and_download_models(weight_path, model_path, REMOTE_PATH) env_id = args.env_id # initialize if detector: det_net = ailia.Detector( MODEL_YOLOV3_PATH, WEIGHT_YOLOV3_PATH, 80, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id, ) else: det_net = None net = ailia.Net(model_path, weight_path, env_id=env_id) if args.video is not None: # video mode recognize_from_video(net, det_net) else: # image mode recognize_from_image(net, det_net)
def recognize_from_image(): # prepare input data org_img = load_image( args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None', ) if org_img.shape[2] == 3: org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2BGRA) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') categories = 80 threshold = 0.4 iou = 0.45 detector = ailia.Detector(MODEL_PATH, WEIGHT_PATH, categories, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_SSD, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(org_img, threshold, iou) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: detector.compute(org_img, threshold, iou) # postprocessing res_img = plot_results(detector, org_img, VOC_CATEGORY) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def main(): # model files check and download logger.info("=== YOLOv3 model ===") check_and_download_models(WEIGHT_YOLOV3_PATH, MODEL_YOLOV3_PATH, REMOTE_YOLOV3_PATH) logger.info("=== HRNet model ===") check_and_download_models(WEIGHT_POSE_PATH, MODEL_POSE_PATH, REMOTE_PATH) logger.info("=== GAST model ===") check_and_download_models(WEIGHT_27FRAME_17JOINT_PATH, MODEL_27FRAME_17JOINT_PATH, REMOTE_PATH) num_person = args.num_person # net initialize detector = ailia.Detector( MODEL_YOLOV3_PATH, WEIGHT_YOLOV3_PATH, 80, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) pose_net = ailia.Net(MODEL_POSE_PATH, WEIGHT_POSE_PATH, env_id=args.env_id) if not args.onnx: net = ailia.Net(MODEL_27FRAME_17JOINT_PATH, WEIGHT_27FRAME_17JOINT_PATH, env_id=args.env_id) else: import onnxruntime net = onnxruntime.InferenceSession(WEIGHT_27FRAME_17JOINT_PATH) info = { "yolo_model": detector, "pose_model": pose_net, "num_person": num_person, } recognize_from_video(net, info)
def compare_video(): # prepare base image tracks = [] # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # detector initialize if args.face == "blazeface": detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=args.env_id) else: detector = ailia.Detector(FACE_MODEL_PATH, FACE_WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=FACE_RANGE, algorithm=FACE_ALGORITHM, env_id=args.env_id) # web camera capture = webcamera_utils.get_capture(args.video) # ui buffer ui_width = capture.get( cv2.CAP_PROP_FRAME_WIDTH) + IMAGE_WIDTH / 4 * FACE_TRACK_T ui_height = max(capture.get(cv2.CAP_PROP_FRAME_HEIGHT), IMAGE_HEIGHT / 4 * 2 * 8) ui = np.zeros((int(ui_height), int(ui_width), 3), np.uint8) frame_no = 0 # writer writer = None if args.savepath is not None: writer = webcamera_utils.get_writer( args.savepath, ui.shape[0], ui.shape[1], fps=capture.get(cv2.CAP_PROP_FPS), ) # inference loop while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break # get frame size h, w = frame.shape[0], frame.shape[1] # get faces from image detections = get_faces(detector, frame, w, h) # track face face_identification(tracks, net, detections, frame_no) frame_no = frame_no + 1 # display result ui[:, :, :] = 0 ui[0:h, 0:w, :] = frame[:, :, :] display_detections(ui, w, h, detections) display_tracks(ui, w, h, tracks) # show cv2.imshow('arcface', ui) if writer is not None: writer.write(ui) if writer is not None: writer.release() capture.release() cv2.destroyAllWindows() logger.info('Script finished successfully.')
def recognize_from_video(): try: print('[INFO] Webcam mode is activated') RECORD_TIME = 80 capture = cv2.VideoCapture(int(args.video)) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) except ValueError: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) frame_rate = capture.get(cv2.CAP_PROP_FPS) if FRAME_SKIP: action_recognize_fps = int(args.fps) else: action_recognize_fps = frame_rate if args.savepath != "": size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') writer = cv2.VideoWriter(args.savepath, fmt, action_recognize_fps, size) else: writer = None # pose estimation env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') if args.arch == "lw_human_pose": pose = ailia.PoseEstimator(MODEL_PATH, WEIGHT_PATH, env_id=env_id, algorithm=ALGORITHM) detector = None else: detector = ailia.Detector(DETECTOR_MODEL_PATH, DETECTOR_WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=env_id) # tracker class instance extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=env_id) metric = NearestNeighborDistanceMetric("cosine", MAX_COSINE_DISTANCE, NN_BUDGET) tracker = Tracker(metric, max_iou_distance=0.7, max_age=70, n_init=3) # action recognition env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') model = ailia.Net(ACTION_MODEL_PATH, ACTION_WEIGHT_PATH, env_id=env_id) action_data = {} frame_nb = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) idx_frame = 0 time_start = time.time() while (True): time_curr = time.time() if args.video == '0' and time_curr - time_start > RECORD_TIME: break ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if (not ret) or (frame_nb >= 1 and idx_frame >= frame_nb): break if FRAME_SKIP: mod = round(frame_rate / action_recognize_fps) if mod >= 1: if idx_frame % mod != 0: idx_frame = idx_frame + 1 continue input_image, input_data = adjust_frame_size( frame, frame.shape[0], frame.shape[1], ) input_data = cv2.cvtColor(input_data, cv2.COLOR_BGR2BGRA) # inferece if args.arch == "lw_human_pose": _ = pose.compute(input_data) else: detector.compute(input_data, THRESHOLD, IOU) # deepsort format h, w = input_image.shape[0], input_image.shape[1] if args.arch == "lw_human_pose": bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose( pose, h, w) else: bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w) mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, # delete this line if using a better pedestrian detector if args.arch == "pose_resnet": # bbox_xywh[:, 3:] *= 1.2 #May need to be removed in the future cls_conf = cls_conf[mask] # do tracking img_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = xywh_to_xyxy(box, h, w) img_crops.append(input_image[y1:y2, x1:x2]) if img_crops: # preprocess img_batch = np.concatenate([ normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :] for img in img_crops ], axis=0).transpose(0, 3, 1, 2) # TODO better to pass a batch at once # features = extractor.predict(img_batch) features = [] for img in img_batch: features.append(extractor.predict(img[np.newaxis, :, :, :])[0]) features = np.array(features) else: features = np.array([]) bbox_tlwh = xywh_to_tlwh(bbox_xywh) detections = [ Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE ] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) nms_max_overlap = 1.0 indices = non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker tracker.predict() tracker.update(detections) # update bbox identities outputs = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh() x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w) track_id = track.track_id outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) # action detection actions = [] persons = [] if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] for i, box in enumerate(bbox_xyxy): id = identities[i] if not (id in action_data): action_data[id] = np.zeros( (ailia.POSE_KEYPOINT_CNT - 1, TIME_RANGE, 3)) # action recognition action, person = action_recognition(box, input_image, pose, detector, model, action_data[id]) actions.append(action) persons.append(person) # draw box for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] frame = draw_boxes(input_image, bbox_xyxy, identities, actions, action_data, (0, 0)) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy)) # draw skelton for person in persons: if person != None: display_result(input_image, person) if writer is not None: writer.write(input_image) # show progress if idx_frame == "0": print() print("\r" + str(idx_frame + 1) + " / " + str(frame_nb), end="") if idx_frame == frame_nb - 1: print() cv2.imshow('frame', input_image) idx_frame = idx_frame + 1 if writer is not None: writer.release() capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() if args.env_id is not None: count = ailia.get_environment_count() if count > args.env_id: env_id = args.env_id else: print(f'specified env_id: {args.env_id} cannot found error') print(f'env_id: {env_id}') detector = ailia.Detector(MODEL_PATH, WEIGHT_PATH, len(HAND_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) hand = ailia.PoseEstimator(HAND_MODEL_PATH, HAND_WEIGHT_PATH, env_id=env_id, algorithm=HAND_ALGORITHM) hand.set_threshold(0.1) ailia_input_w = detector.get_input_shape()[3] ailia_input_h = detector.get_input_shape()[2] capture = get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = calc_adjust_fsize(f_h, f_w, ailia_input_h, ailia_input_w) writer = get_writer(args.savepath, save_h, save_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, THRESHOLD, IOU) h, w = img.shape[0], img.shape[1] count = detector.get_object_count() for idx in range(count): # get detected hand obj = detector.get_object(idx) margin = 1.0 cx = (obj.x + obj.w / 2) * w cy = (obj.y + obj.h / 2) * h cw = max(obj.w * w, obj.h * h) * margin fx = max(cx - cw / 2, 0) fy = max(cy - cw / 2, 0) fw = min(cw, w - fx) fh = min(cw, h - fy) top_left = (int(fx), int(fy)) bottom_right = (int(fx + fw), int(fy + fh)) # display detected hand color = hsv_to_rgb(0, 255, 255) cv2.rectangle(frame, top_left, bottom_right, color, 4) # get detected face crop_img = img[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0], 0:4] if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue # inference _ = hand.compute(crop_img.astype(np.uint8, order='C')) # postprocessing display_result(frame, hand, top_left, bottom_right) cv2.imshow('frame', frame) # save results if writer is not None: writer.write(frame) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() print('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Detector(MODEL_PATH, WEIGHT_PATH, len(HAND_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) hand = ailia.PoseEstimator(HAND_MODEL_PATH, HAND_WEIGHT_PATH, env_id=env_id, algorithm=HAND_ALGORITHM) hand.set_threshold(0.1) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, THRESHOLD, IOU) h, w = img.shape[0], img.shape[1] count = detector.get_object_count() for idx in range(count): # get detected hand obj = detector.get_object(idx) margin = 1.0 cx = (obj.x + obj.w / 2) * w cy = (obj.y + obj.h / 2) * h cw = max(obj.w * w, obj.h * h) * margin fx = max(cx - cw / 2, 0) fy = max(cy - cw / 2, 0) fw = min(cw, w - fx) fh = min(cw, h - fy) top_left = (int(fx), int(fy)) bottom_right = (int(fx + fw), int(fy + fh)) # display detected hand color = hsv_to_rgb(0, 255, 255) cv2.rectangle(frame, top_left, bottom_right, color, 4) # get detected face crop_img = img[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0], 0:4] if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue # inferece _ = hand.compute(crop_img.astype(np.uint8, order='C')) # postprocessing display_result(frame, hand, top_left, bottom_right) cv2.imshow('frame', frame) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
model_path = sys.argv[1]+".prototxt" weight_path = sys.argv[1] classes_path = sys.argv[2] img_path = sys.argv[3] with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] print("loading ..."); # detector initialize env_id = ailia.get_gpu_environment_id() categories = len(class_names) detector = ailia.Detector(model_path, weight_path, categories, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) # load input image and convert to BGRA img = cv2.imread( img_path, cv2.IMREAD_UNCHANGED ) if img.shape[2] == 3 : img = cv2.cvtColor( img, cv2.COLOR_BGR2BGRA ) elif img.shape[2] == 1 : img = cv2.cvtColor( img, cv2.COLOR_GRAY2BGRA ) print( "img.shape=" + str(img.shape) ) work = img w = img.shape[1] h = img.shape[0] print("inferencing ...");