logger = getLogger(__name__) # ====================== # Parameters 1 # ====================== IMAGE_PATH = 'unity_chan.png' SAVE_IMAGE_PATH = 'output.png' IMAGE_HEIGHT = 128 IMAGE_WIDTH = 128 # ====================== # Argument Parser Config # ====================== parser = get_base_parser( 'Yet-Another-Anime-Segmenter, anime character segmentation.', IMAGE_PATH, SAVE_IMAGE_PATH, ) parser.add_argument( '--onnx', action='store_true', help='By default, the ailia SDK is used, but with this option, ' + 'you can switch to using ONNX Runtime' ) args = update_parser(parser, large_model=True) # ====================== # Parameters 2 # ====================== MODEL_NAME = 'yaas_solov2' WEIGHT_PATH = f'{MODEL_NAME}.onnx'
"hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] THRESHOLD = 0.4 IOU = 0.45 POSE_THRESHOLD = 0.1 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'Simple Baseline for Pose Estimation', IMAGE_PATH, SAVE_IMAGE_PATH, ) args = update_parser(parser) # ====================== # Display result # ====================== def hsv_to_rgb(h, s, v): bgr = cv2.cvtColor(np.array([[[h, s, v]]], dtype=np.uint8), cv2.COLOR_HSV2BGR)[0][0] return (int(bgr[0]), int(bgr[1]), int(bgr[2]), 255) def line(input_img, person, point1, point2):
IMAGE_PATH = 'aflw-test.jpg' SAVE_IMAGE_PATH = 'output.png' IMAGE_HEIGHT = 256 IMAGE_WIDTH = 256 THRESHOLD = 0.1 FACE_WEIGHT_PATH = 'blazeface.onnx' FACE_MODEL_PATH = 'blazeface.onnx.prototxt' FACE_REMOTE_PATH = "https://storage.googleapis.com/ailia-models/blazeface/" FACE_MARGIN = 1.2 # ====================== # Argument Parser Config # ====================== parser = get_base_parser('Face alignment model', IMAGE_PATH, SAVE_IMAGE_PATH) parser.add_argument('-3', '--active_3d', action='store_true', help='Activate 3D face alignment mode') args = update_parser(parser) # ====================== # PARAMETERS 2 # ====================== WEIGHT_PATH = '3DFAN-4.onnx' if args.active_3d else '2DFAN-4.onnx' MODEL_PATH = WEIGHT_PATH + '.prototxt' DEPTH_WEIGHT_PATH = 'depth_estimation.onnx' DEPTH_MODEL_PATH = DEPTH_WEIGHT_PATH + '.prototxt' PRED_TYPE = collections.namedtuple('prediction_type', ['slice', 'color'])
"oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] THRESHOLD = 0.1 # 0.4 MIN_CONFIDENCE = 0.3 IOU = 0.45 # Metric parameters MAX_COSINE_DISTANCE = 0.2 # threshold of matching object NN_BUDGET = 100 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Deep SORT', IMAGE_PATH, SAVE_IMAGE_PATH,) parser.add_argument( '-p', '--pairimage', metavar='IMAGE', nargs=2, default=[None, None], help=('If this option is specified, the model is set to determine ' 'if the person in two images is the same person or not.') ) args = update_parser(parser) # ====================== # Utils # ====================== def resize(img, size=(INPUT_WIDTH, INPUT_HEIGHT)): return cv2.resize(img.astype(np.float32), size)
SAVE_IMAGE_PATH = 'output.png' CATEGORY = ('Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe') IMAGE_HEIGHT = 473 IMAGE_WIDTH = 473 NORM_MEAN = [0.406, 0.456, 0.485] NORM_STD = [0.225, 0.224, 0.229] # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Human-Part-Segmentation model', IMAGE_PATH, SAVE_IMAGE_PATH) args = update_parser(parser) # ====================== # Secondaty Functions # ====================== def preprocess(img): h, w, _ = img.shape # Get person center and scale person_center, s = xywh2cs(0, 0, w - 1, h - 1) r = 0 trans = get_affine_transform(person_center, s, r, [IMAGE_HEIGHT, IMAGE_WIDTH]) img = cv2.warpAffine(img,
# ====================== MODEL_PATH = 'etl_BINARY_squeezenet128_20.prototxt' WEIGHT_PATH = 'etl_BINARY_squeezenet128_20.caffemodel' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/etl/' IMAGE_PATH = 'font.png' IMAGE_HEIGHT = 28 IMAGE_WIDTH = 28 ETL_PATH = 'etl_BINARY_squeezenet128_20.txt' MAX_CLASS_COUNT = 3 SLEEP_TIME = 0 # for webcam mode # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Japanese character classification model.', IMAGE_PATH, None) args = update_parser(parser) # ====================== # Utils # ====================== def preprocess_image(img): if img.shape[2] == 3: img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA) elif img.shape[2] == 1: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGRA) img = cv2.bitwise_not(img) return img
NORM_MEAN = [123.675, 116.28, 103.53] NORM_STD = [58.395, 57.12, 57.375] RCNN_MASK_THRE = 0.5 U2NET_MODEL_LIST = ['small', 'large'] WEIGHT_U2NET_LARGE_PATH = 'u2net_opset11.onnx' MODEL_U2NET_LARGE_PATH = 'u2net_opset11.onnx.prototxt' WEIGHT_U2NET_SMALL_PATH = 'u2netp_opset11.onnx' MODEL_U2NET_SMALL_PATH = 'u2netp_opset11.onnx.prototxt' REMOTE_U2NET_PATH = 'https://storage.googleapis.com/ailia-models/u2net/' U2NET_IMAGE_SIZE = 320 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('MMFashion model', IMAGE_PATH, SAVE_IMAGE_PATH) parser.add_argument('-th', '--threshold', default=THRESHOLD, type=float, help='The detection threshold for yolo. (default: ' + str(THRESHOLD) + ')') parser.add_argument('-pp', '--preprocess', metavar='ARCH', default=None, choices=U2NET_MODEL_LIST, help='preprocess model (U square net) architecture: ' + ' | '.join(U2NET_MODEL_LIST)) args = update_parser(parser)
# the threshold was calculated by the `test_performance` function in `test.py` # of the original repository THRESHOLD = 0.25572845 # THRESHOLD = 0.45 # for mixed model # face detection FACE_MODEL_LISTS = ['yolov3', 'blazeface', 'yolov3-mask'] FACE_THRESHOLD = 0.4 FACE_IOU = 0.45 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'Determine if the person is the same from two facial images.', None, None, ) # overwrite default config # NOTE: arcface has different usage for `--input` with other models parser.add_argument('-i', '--inputs', metavar='IMAGE', nargs=2, default='', help='Two image paths for calculating the face match.') parser.add_argument('-a', '--arch', metavar='ARCH', default='arcface', choices=MODEL_LISTS,
from model_utils import check_and_download_models # noqa: E402 # ====================== # Parameters 1 # ====================== IMAGE_PATH = 'person_hand.jpg' SAVE_IMAGE_PATH = 'output.png' IMAGE_HEIGHT = 256 IMAGE_WIDTH = 256 # ====================== # Argument Parser Config # ====================== parser = get_base_parser( 'BlazeHand, an on-device real-time hand tracking.', IMAGE_PATH, SAVE_IMAGE_PATH, ) args = update_parser(parser) # ====================== # Parameters 2 # ====================== DETECTION_MODEL_NAME = 'blazepalm' LANDMARK_MODEL_NAME = 'blazehand' # if args.normal: DETECTION_WEIGHT_PATH = f'{DETECTION_MODEL_NAME}.onnx' DETECTION_MODEL_PATH = f'{DETECTION_MODEL_NAME}.onnx.prototxt' LANDMARK_WEIGHT_PATH = f'{LANDMARK_MODEL_NAME}.onnx' LANDMARK_MODEL_PATH = f'{LANDMARK_MODEL_NAME}.onnx.prototxt' # else:
WEIGHT_PATH = 'pedestrian_detection.opt.onnx' MODEL_PATH = 'pedestrian_detection.opt.onnx.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/pedestrian_detection/' IMAGE_PATH = 'input.jpg' SAVE_IMAGE_PATH = 'output.png' COCO_CATEGORY = ["person"] THRESHOLD = 0.4 IOU = 0.45 DETECTION_SIZE = 416 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Pedestrian detection model', IMAGE_PATH, SAVE_IMAGE_PATH) parser.add_argument('-th', '--threshold', default=THRESHOLD, type=float, help='The detection threshold for yolo. (default: ' + str(THRESHOLD) + ')') parser.add_argument('-iou', '--iou', default=IOU, type=float, help='The detection iou for yolo. (default: ' + str(IOU) + ')') parser.add_argument( '-dw', '--detection_width',
MODEL_PATH = 'pose_hg_3d.onnx.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/pose_hg_3d/' IMAGE_PATH = 'input.jpg' SAVE_IMAGE_PATH = 'output.png' IMAGE_HEIGHT = 256 IMAGE_WIDTH = 256 CHANNELS = 3 mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3) std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3) # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('pose_hg_3d model', IMAGE_PATH, SAVE_IMAGE_PATH) args = update_parser(parser) # ====================== # Main functions # ====================== def recognize_from_image(): # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: logger.info(image_path) img = cv2.imread(IMAGE_PATH)
UV_FACE_PATH = 'uv-data/uv_face.png' UV_FACE_EYES_PATH = 'uv-data/uv_face_eyes.png' # NOTE: In the original repository, "resolution of input and output image size" # can be specified separately (though the both size are fixed 256) IMAGE_SIZE = 256 # ntri x 3 TRIANGLES = np.loadtxt('uv-data/triangles.txt').astype(np.int32) UV_COORDS = generate_uv_coords(IMAGE_SIZE) # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('PR-Net', IMAGE_PATH, SAVE_FOLDER) # texture editing mode configuration parser.add_argument( '-t', '--texture', metavar='MODE', type=int, default=-1, help='Ways to edit texture. 0 for modifying parts (eyes in this ex.), ' + '1 for changing whole, -1 for normal recognition mode') parser.add_argument('-r', '--refpath', metavar='IMAGE', default=REF_IMAGE_PATH, help='The path to the texture reference image. ' +
REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/lightweight-human-pose-estimation-3d/' IMAGE_PATH = 'input.png' SAVE_IMAGE_PATH = 'output.png' FILE_PATH = 'extrinsics.json' IMAGE_HEIGHT = 256 IMAGE_WIDTH = 448 STRIDE = 8 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( ('Lightweight 3D human pose estimation demo. ' 'Press esc to exit, "p" to (un)pause video or process next image.'), IMAGE_PATH, SAVE_IMAGE_PATH, ) parser.add_argument( '--rotate3d', action='store_true', default=False, help='allowing 3D canvas rotation while on pause', ) args = update_parser(parser) # ====================== # Utils # ====================== def rotate_poses(poses_3d, R, t):
# ====================== WEIGHT_PATH = 'mask_rcnn_R_50_FPN_1x.onnx' MODEL_PATH = 'mask_rcnn_R_50_FPN_1x.onnx.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/mask_rcnn/' IMAGE_PATH = 'demo.jpg' SAVE_IMAGE_PATH = 'output.png' CLASSES = [line.rstrip('\n') for line in open('coco_classes.txt')] # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'Real-time NN for object instance segmentation by Mask R-CNN', IMAGE_PATH, SAVE_IMAGE_PATH, ) args = update_parser(parser, large_model=True) # ====================== # Utils # ====================== def preprocess(image): # Resize ratio = 800.0 / min(image.size[0], image.size[1]) resize_w = int(ratio * image.size[0]) resize_h = int(ratio * image.size[1]) if (max(resize_w, resize_h) > 1280.0): ratio = 1280.0 / max(image.size[0], image.size[1])
# ====================== # Parameters 1 # ====================== MODEL_NAMES = ['resnet50.opt', 'resnet50', 'resnet50_pytorch'] IMAGE_PATH = 'pizza.jpg' IMAGE_HEIGHT = 224 IMAGE_WIDTH = 224 IMAGE_RANGE = ailia.NETWORK_IMAGE_RANGE_S_INT8 MAX_CLASS_COUNT = 3 SLEEP_TIME = 0 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Resnet50 ImageNet classification model', IMAGE_PATH, None) parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet50.opt', choices=MODEL_NAMES, help=('model architecture: ' + ' | '.join(MODEL_NAMES) + ' (default: resnet50.opt)')) args = update_parser(parser) # ====================== # Parameters 2 # ====================== WEIGHT_PATH = args.arch + '.onnx' MODEL_PATH = args.arch + '.onnx.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/resnet50/'
"https://storage.googleapis.com/ailia-models/resnet_facial_feature/" IMAGE_PATH = 'test.png' SAVE_IMAGE_PATH = 'output.png' IMAGE_HEIGHT = 226 IMAGE_WIDTH = 226 FACE_WEIGHT_PATH = 'blazeface.onnx' FACE_MODEL_PATH = 'blazeface.onnx.prototxt' FACE_REMOTE_PATH = "https://storage.googleapis.com/ailia-models/blazeface/" FACE_MARGIN = 1.0 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('kaggle facial keypoints.', IMAGE_PATH, SAVE_IMAGE_PATH) args = update_parser(parser) # ====================== # Utils # ====================== def gen_img_from_predsailia(input_data, preds_ailia): fig = plt.figure(figsize=(3, 3)) ax = fig.add_axes([0, 0, 1, 1]) ax.imshow(input_data.reshape(IMAGE_HEIGHT, IMAGE_WIDTH)) points = np.vstack(np.split(preds_ailia, 15)).T * 113 + 113 ax.plot(points[0], points[1], 'o', color='red') return fig
logger = getLogger(__name__) # ====================== # Parameters 1 # ====================== IMAGE_PATH = 'clock.jpg' IMAGE_HEIGHT = 224 IMAGE_WIDTH = 224 MODEL_LISTS = ['small', 'large'] SLEEP_TIME = 0 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('ImageNet classification Model', IMAGE_PATH, None) parser.add_argument('-a', '--arch', metavar='ARCH', default='small', choices=MODEL_LISTS, help='model lists: ' + ' | '.join(MODEL_LISTS) + ' (default: small)') args = update_parser(parser) # ====================== # Parameters 2 # ====================== WEIGHT_PATH = f'mobilenetv3_{args.arch}.onnx' MODEL_PATH = WEIGHT_PATH + '.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/mobilenetv3/'
WEIGHT_PATH = 'u2net-human-seg.onnx' MODEL_PATH = 'u2net-human-seg.onnx.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/u2net-human-seg/' IMAGE_PATH = 'input.jpg' SAVE_IMAGE_PATH = 'output.png' IMAGE_SIZE = 320 # ====================== # Argument Parser Config # ====================== parser = get_base_parser( 'U^2-Net - human segmentation', IMAGE_PATH, SAVE_IMAGE_PATH, ) parser.add_argument('-c', '--composite', action='store_true', help='Composite input image and predicted alpha value') args = update_parser(parser) # ====================== # Utils # ====================== def preprocess(img): img = transform.resize(img, (IMAGE_SIZE, IMAGE_SIZE), mode='constant')
REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/u2net_portrait/' IMAGE_PATH = 'your_portrait_im/kid1.jpg' SAVE_IMAGE_PATH = 'your_portrait_results/kid1.jpg' FACE_CASCADE_MODEL_PATH = 'haarcascade_frontalface_default.xml' IMAGE_WIDTH = 512 IMAGE_HEIGHT = 512 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'U^2-Net: Going Deeper with Nested U-Structure for Salient Object Detection', IMAGE_PATH, SAVE_IMAGE_PATH, ) args = update_parser(parser) # ====================== # Utils # ====================== def detect_single_face(face_cascade, img): # Convert into grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Detect faces faces = face_cascade.detectMultiScale(gray, 1.1, 4) if len(faces) == 0:
# Parameters # ====================== WEIGHT_PATH = 'pixellink-vgg16-4s.onnx' MODEL_PATH = 'pixellink-vgg16-4s.onnx.prototxt' REMOTE_PATH = \ 'https://storage.googleapis.com/ailia-models/pixel_link/' IMAGE_PATH = 'img_249.jpg' SAVE_IMAGE_PATH = 'output.png' # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Pixel-Link model', IMAGE_PATH, SAVE_IMAGE_PATH) args = update_parser(parser) # ====================== # Secondaty Functions # ====================== def post_processing(pixel_pos_scores, link_pos_scores, image_shape): mask = decode_batch(pixel_pos_scores, link_pos_scores)[0, ...] bboxes = mask_to_bboxes(mask, image_shape) return bboxes # ======================
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] THRESHOLD = 0.4 IOU = 0.45 DETECTION_SIZE = 416 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Yolov3 model', IMAGE_PATH, SAVE_IMAGE_PATH) parser.add_argument('-th', '--threshold', default=THRESHOLD, type=float, help='The detection threshold for yolo. (default: ' + str(THRESHOLD) + ')') parser.add_argument('-iou', '--iou', default=IOU, type=float, help='The detection iou for yolo. (default: ' + str(IOU) + ')') parser.add_argument('-w', '--write_prediction', action='store_true',
# ====================== # Parameters 1 # ====================== IMAGE_PATH = 'girl-5204299_640.jpg' SAVE_IMAGE_PATH = 'output.png' IMAGE_HEIGHT = 240 IMAGE_WIDTH = 320 ALGORITHM = ailia.POSE_ALGORITHM_LW_HUMAN_POSE # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'Fast and accurate human pose 2D-estimation.', IMAGE_PATH, SAVE_IMAGE_PATH, ) parser.add_argument( '-n', '--normal', action='store_true', help='By default, the optimized model is used, but with this option, ' + 'you can switch to the normal (not optimized) model' ) args = update_parser(parser) # ====================== # Parameters 2 # ====================== MODEL_NAME = 'lightweight-human-pose-estimation' if args.normal:
IMAGE_PATH = 'demo.jpg' SAVE_IMAGE_PATH = 'output.png' VOCAB_FILE_PATH = 'vocab.json' NORM_MEAN = [0.485, 0.456, 0.406] NORM_STD = [0.229, 0.224, 0.225] SLEEP_TIME = 0 INPUT_WIDTH = 640 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('ImageCaptioning.pytorch model', IMAGE_PATH, SAVE_IMAGE_PATH) parser.add_argument('--model', type=str, default='fc_nsc', choices=('fc', 'fc_rl', 'fc_nsc'), help='captioning model (fc | fc_rl | fc_nsc)') args = update_parser(parser) # ====================== # Secondaty Functions # ====================== def preprocess(img): h, w, _ = img.shape if w >= INPUT_WIDTH: img = cv2.resize(img, (INPUT_WIDTH, int(h * INPUT_WIDTH / w)))
WEIGHT_DETECTOR_PATH = 'pose_detection.onnx' MODEL_DETECTOR_PATH = 'pose_detection.onnx.prototxt' REMOTE_PATH = \ 'https://storage.googleapis.com/ailia-models/blazepose-fullbody/' IMAGE_PATH = 'girl-5204299_640.jpg' SAVE_IMAGE_PATH = 'output.png' IMAGE_SIZE = 256 # ====================== # Argument Parser Config # ====================== parser = get_base_parser( 'BlazePose, an on-device real-time body pose tracking.', IMAGE_PATH, SAVE_IMAGE_PATH, ) parser.add_argument( '-m', '--model', metavar='ARCH', default='heavy', choices=MODEL_LIST, help='Set model architecture: ' + ' | '.join(MODEL_LIST) ) parser.add_argument( '-th', '--threshold', default=0.5, type=float, help='The detection threshold' ) args = update_parser(parser)
# ====================== # Parameters # ====================== WEIGHT_PATH = 'illnet.onnx' MODEL_PATH = 'illnet.onnx.prototxt' REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/illnet/' IMAGE_PATH = 'input.png' SAVE_IMAGE_PATH = 'output.png' PATCH_RES = 128 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('Illumination Correction Model', IMAGE_PATH, SAVE_IMAGE_PATH) args = update_parser(parser) # ====================== # Main functions # ====================== def recognize_from_image(): # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) img = io.imread(image_path)
from logging import getLogger logger = getLogger(__name__) os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' # ====================== # Parameters # ====================== REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/codes-for-lane-detection/' IMAGE_PATH = 'input.jpg' SAVE_IMAGE_PATH = 'output.jpg' # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser('erfnet model', IMAGE_PATH, SAVE_IMAGE_PATH) args = update_parser(parser) WEIGHT_PATH = 'erfnet.opt.onnx' MODEL_PATH = 'erfnet.opt.onnx.prototxt' HEIGHT = 208 WIDTH = 976 INPUT_MEAN = [103.939, 116.779, 123.68] INPUT_STD = [1, 1, 1] # ====================== # Main functions # ======================
# ====================== WEIGHT_PATH = "crowdcount.onnx" MODEL_PATH = "crowdcount.onnx.prototxt" REMOTE_PATH = "https://storage.googleapis.com/ailia-models/crowd_count/" IMAGE_PATH = 'test.jpeg' SAVE_IMAGE_PATH = 'result.png' IMAGE_WIDTH = 640 IMAGE_HEIGHT = 480 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'Single image crowd counting.', IMAGE_PATH, SAVE_IMAGE_PATH, ) args = update_parser(parser) # ====================== # Main functions # ====================== def estimate_from_image(): # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data
# ====================== PATH_SUFFIX = ['councilGAN-glasses', 'councilGAN-m2f_256', 'councilGAN-anime'] MODEL = 0 REMOTE_PATH = "https://storage.googleapis.com/ailia-models/council-gan/" IMAGE_PATH = 'sample.jpg' SAVE_IMAGE_PATH = 'output.png' # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'Glasses removal, m2f and anime transformation GAN based on SimGAN', IMAGE_PATH, SAVE_IMAGE_PATH, ) parser.add_argument( '-f', '--face_recognition', action='store_true', help='Run face recognition with yolo v3 (only for glasses removal mode)') parser.add_argument('-d', '--dilation', metavar='DILATION', default=1, help='Dilation value for face recognition image size') parser.add_argument('-g', '--glasses', action='store_true',
IMAGE_PATH = 'test.png' SAVE_IMAGE_PATH = 'output.png' WC_IMG_HEIGHT = 256 WC_IMG_WIDTH = 256 BM_IMG_HEIGHT = 128 BM_IMG_WIDTH = 128 # ====================== # Arguemnt Parser Config # ====================== parser = get_base_parser( 'DewarpNet is a model for document image unwarping.', IMAGE_PATH, SAVE_IMAGE_PATH, ) args = update_parser(parser) # ====================== # Utils # ====================== def grid_sample(img, grid): height, width, c = img.shape output = np.zeros_like(img) grid[:, :, 0] = (grid[:, :, 0] + 1) * (width-1) / 2 grid[:, :, 1] = (grid[:, :, 1] + 1) * (height-1) / 2 # TODO speed up here for h in range(height):
logger = getLogger(__name__) # ====================== # Parameters 1 # ====================== IMAGE_PATH = 'person_with_hands.jpg' SAVE_IMAGE_PATH = 'output.png' IMAGE_HEIGHT = 256 IMAGE_WIDTH = 256 # ====================== # Argument Parser Config # ====================== parser = get_base_parser( 'BlazePalm, on-device real-time palm detection.', IMAGE_PATH, SAVE_IMAGE_PATH, ) args = update_parser(parser) # ====================== # Parameters 2 # ====================== MODEL_NAME = 'blazepalm' # if args.normal: WEIGHT_PATH = f'{MODEL_NAME}.onnx' MODEL_PATH = f'{MODEL_NAME}.onnx.prototxt' # else: # WEIGHT_PATH = f'{MODEL_NAME}.opt.onnx' # MODEL_PATH = f'{MODEL_NAME}.opt.onnx.prototxt' REMOTE_PATH = f'https://storage.googleapis.com/ailia-models/{MODEL_NAME}/'