def preprocess(img): h, w = img.shape[:2] resize = 256 if h > w: h = resize * h // w w = resize else: w = resize * w // h h = resize img = np.array(Image.fromarray(img).resize((w, h), Image.BILINEAR)) if h > IMAGE_SIZE: pad = (h - IMAGE_SIZE) // 2 img = img[pad:pad + IMAGE_SIZE, :] if w > IMAGE_SIZE: pad = (w - IMAGE_SIZE) // 2 img = img[:, pad:pad + IMAGE_SIZE] img = normalize_image(img, normalize_type='ImageNet') img = img.transpose((2, 0, 1)) img = np.expand_dims(img, axis=0) return img
def preprocess(img, mask=False): h, w = img.shape[:2] size = IMAGE_RESIZE crop_size = IMAGE_SIZE # resize if h > w: size = (size, int(size * h / w)) else: size = (int(size * w / h), size) img = np.array( Image.fromarray(img).resize( size, resample=Image.ANTIALIAS if not mask else Image.NEAREST)) # center crop h, w = img.shape[:2] pad_h = (h - crop_size) // 2 pad_w = (w - crop_size) // 2 img = img[pad_h:pad_h + crop_size, pad_w:pad_w + crop_size, :] # normalize if not mask: img = normalize_image(img.astype(np.float32), 'ImageNet') else: img = img / 255 img = img.transpose(2, 0, 1) # HWC -> CHW img = np.expand_dims(img, axis=0) return img
def preprocess(img): h, w = (IMAGE_HEIGHT, IMAGE_WIDTH) im_h, im_w, _ = img.shape max_orig_size = max(im_h, im_w) min_orig_size = min(im_h, im_w) if max_orig_size / min_orig_size * h > w: size = int(round(w * min_orig_size / max_orig_size)) else: size = h if im_h > im_w: scale = size / im_w ow = size oh = (size * im_h) // im_w else: scale = size / im_h oh = size ow = (size * im_w) // im_h if ow != im_w or oh != im_h: img = np.array(Image.fromarray(img).resize((ow, oh), Image.BILINEAR)) img = normalize_image(img, normalize_type='ImageNet') # padding new_img = np.zeros((h, w, 3)) x = (w - ow) // 2 y = (h - oh) // 2 new_img[y:y + oh, x:x + ow, :] = img img = new_img img = img.transpose(2, 0, 1) # HWC -> CHW img = img.astype(np.float32) return img, (x, y), scale
def preprocess(img): img = img.astype(np.float32) img = normalize_image(img, normalize_type='ImageNet') img = img.transpose((2, 0, 1)) # HWC -> CHW img = np.expand_dims(img, axis=0) return img
def predict(landmark_detector, face_detector, img): if face_detector is not None: bboxes = detect_faces(img, face_detector) else: h, w = img.shape[:2] bboxes = [np.array([0, 0, w - 1, h - 1, 1])] bboxes = np.array(bboxes) pose_results = [] if len(bboxes) == 0: return pose_results bboxes_xywh = xyxy2xywh(bboxes) img_size = (256, 256) batch_data = [] img_metas = [] for bbox in bboxes_xywh: c, s = box2cs(bbox) r = 0 img_metas.append({ "center": c, "scale": s, }) trans = get_affine_transform(c, s, r, img_size) _img = cv2.warpAffine(img, trans, (img_size[0], img_size[1]), flags=cv2.INTER_LINEAR) _img = normalize_image(_img[:, :, ::-1], 'ImageNet') batch_data.append(_img) batch_data = np.asarray(batch_data) batch_data = batch_data.transpose((0, 3, 1, 2)) output = landmark_detector.predict([batch_data]) heatmap = output[0] if 1: # do flip batch_data = batch_data[:, :, :, ::-1] # horizontal flip output = landmark_detector.predict([batch_data]) flipped_heatmap = output[0] flip_pairs = [[0, 4], [1, 3], [5, 10], [6, 9], [7, 8], [11, 19], [12, 18], [13, 17], [14, 22], [15, 21], [16, 20], [24, 26]] flipped_heatmap = flip_back(flipped_heatmap, flip_pairs) # feature is not aligned, shift flipped heatmap for higher accuracy flipped_heatmap[:, :, :, 1:] = flipped_heatmap[:, :, :, :-1] heatmap = (heatmap + flipped_heatmap) * 0.5 keypoint_result = keypoint_decode(heatmap, img_metas) return keypoint_result, bboxes
def midas_imread(image_path): if not os.path.isfile(image_path): print(f'[ERROR] {image_path} not found.') sys.exit() image = cv2.imread(image_path) if image.ndim == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = normalize_image(image, 'ImageNet') return midas_resize(image, IMAGE_HEIGHT, IMAGE_WIDTH)
def midas_imread(image_path): if not os.path.isfile(image_path): logger.error(f'{image_path} not found.') sys.exit() image = cv2.imread(image_path) if image.ndim == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = normalize_image(image, 'ImageNet') h, w = (IMAGE_HEIGHT, IMAGE_WIDTH) if not args.v21 or args.model_type == 'large' \ else (IMAGE_HEIGHT_SMALL, IMAGE_WIDTH_SMALL) return midas_resize(image, h, w)
def preprocess(img, gray=False): if gray: img = img / 255 img = (img - 0.5) / 0.5 img = img[:, :, None] else: img = normalize_image(img, normalize_type='ImageNet') img = img.transpose(2, 0, 1) # HWC -> CHW img = np.expand_dims(img, axis=0) img = img.astype(np.float32) return img
def preprocess_aug(img, mask=False, angle_range=[-10, 10], return_refs=False): h, w = img.shape[:2] size = IMAGE_RESIZE crop_size = IMAGE_SIZE # resize if h > w: size = (size, int(size * h / w)) else: size = (int(size * w / h), size) img = np.array( Image.fromarray(img).resize( size, resample=Image.ANTIALIAS if not mask else Image.NEAREST)) # for visualize img_resized = img.copy() # random rotate if not mask: h, w = img.shape[:2] angle = np.random.randint(angle_range[0], angle_range[0] + 1) rot_mat = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) img = cv2.warpAffine(src=img, M=rot_mat, dsize=(w, h), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_LINEAR) # random crop if not mask: h, w = img.shape[:2] pad_h = np.random.randint(0, (h - crop_size)) pad_w = np.random.randint(0, (w - crop_size)) img = img[pad_h:pad_h + crop_size, pad_w:pad_w + crop_size, :] # normalize if not mask: img = normalize_image(img.astype(np.float32), 'ImageNet') else: img = img / 255 img = img.transpose(2, 0, 1) # HWC -> CHW img = np.expand_dims(img, axis=0) if return_refs: return img, img_resized, angle, pad_h, pad_w else: return img
def preprocess_frame(frame, input_height, input_width, data_rgb=True, normalize_type='255'): """ Pre-process the frames taken from the webcam to input to ailia. Parameters ---------- frame: numpy array input_height: int ailia model input height input_width: int ailia model input width data_rgb: bool (default: True) Convert as rgb image when True, as gray scale image when False. Only `data` will be influenced by this configuration. normalize_type: string (default: 255) Normalize type should be chosen from the type below. - '255': simply dividing by 255.0 - '127.5': output range : -1 and 1 - 'ImageNet': normalize by mean and std of ImageNet - 'None': no normalization Returns ------- img: numpy array Image with the propotions of height and width adjusted by padding for ailia model input. data: numpy array Input data for ailia """ img, resized_img = adjust_frame_size(frame, input_height, input_width) if data_rgb: resized_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) data = normalize_image(resized_img, normalize_type) if data_rgb: data = np.rollaxis(data, 2, 0) data = np.expand_dims(data, axis=0).astype(np.float32) else: data = cv2.cvtColor(data.astype(np.float32), cv2.COLOR_BGR2GRAY) data = data[np.newaxis, np.newaxis, :, :] return img, data
def face_detect(img, face_net): IMAGE_BLAZE_SIZE = 128 img_0 = img img = normalize_image(img, normalize_type='127.5') img = cv2.resize(img, (IMAGE_BLAZE_SIZE, IMAGE_BLAZE_SIZE)) img = img.transpose((2, 0, 1)) img = np.expand_dims(img, axis=0) output = face_net.predict([img]) detections = but.postprocess(output) detections = detections[0] # sort by confidence detections = sorted(detections, key=lambda x: x[16], reverse=True) if len(detections) == 0: return None, (0, 0) detection = detections[0] h, w = img_0.shape[:2] ymin = int(detection[0] * h) xmin = int(detection[1] * w) ymax = int(detection[2] * h) xmax = int(detection[3] * w) h = ymax - ymin w = xmax - xmin if h > w: p = (h - w) // 2 w = h xmin -= p else: p = (w - h) // 2 h = w ymin -= p img = img_0[ymin:ymin + h, xmin:xmin + w] h2, w2 = img.shape[:2] if h != h2 or w != w2: return None, (0, 0) return img, (ymin, xmin)
def preprocess(img, image_shape): h, w = image_shape im_h, im_w, _ = img.shape # keep_aspect scale = min(h / im_h, w / im_w) ow, oh = int(im_w * scale + 0.5), int(im_h * scale + 0.5) if ow != im_w or oh != im_h: img = cv2.resize(img, (ow, oh), interpolation=cv2.INTER_LINEAR) img = normalize_image(img, normalize_type='ImageNet') pad_img = np.zeros((h, w, 3), dtype=img.dtype) pad_img[:oh, :ow, ...] = img img = pad_img img = img.transpose(2, 0, 1) # HWC -> CHW img = np.expand_dims(img, axis=0) img = img.astype(np.float32) return img, scale
def transform(img, pp_net): img_0 = img img = cv2.resize(img, (U2NET_IMAGE_SIZE, U2NET_IMAGE_SIZE)) # ToTensorLab part in original repo img = img / np.max(img) * 255 img = normalize_image(img, normalize_type='ImageNet') input_data = img.transpose((2, 0, 1))[np.newaxis, :, :, :] output = pp_net.predict(input_data) pred = output[0, 0, :, :] h, w = img_0.shape[:2] mask = cv2.resize(pred, (w, h)) mask = np.clip(mask, 0, 1) mask = np.expand_dims(mask, axis=2) back = np.ones((h, w, 3)) * 255 img = img_0 * mask + back * (1 - mask) return img
def preprocess(img, bbox): image_size = (IMAGE_SIZE, IMAGE_SIZE) c, s = _box2cs(bbox) r = 0 trans = get_affine_transform(c, s, r, image_size) img = cv2.warpAffine(img, trans, (int(image_size[0]), int(image_size[1])), flags=cv2.INTER_LINEAR) # normalize img = normalize_image(img, normalize_type='ImageNet') img = img.transpose(2, 0, 1) # HWC -> CHW img = np.expand_dims(img, axis=0) img_metas = [{ 'center': c, 'scale': s, }] return img, img_metas
def preprocess(img, bboxs, num_pos=2): IMAGE_SIZE = (288, 384) inputs = [] centers = [] scales = [] for bbox in bboxs[:num_pos]: c, s = box_to_center_scale(bbox, img.shape[0], img.shape[1]) centers.append(c) scales.append(s) r = 0 trans = get_affine_transform(c, s, r, IMAGE_SIZE) input = cv2.warpAffine(img, trans, (IMAGE_SIZE[0], IMAGE_SIZE[1]), flags=cv2.INTER_LINEAR) input = normalize_image(input.astype(np.float32), 'ImageNet') input = input.transpose(2, 0, 1) # HWC -> CHW input = np.expand_dims(input, axis=0) inputs.append(input) inputs = np.vstack(inputs) return inputs, img, centers, scales
def preprocess(img, image_shape): h, w = image_shape im_h, im_w, _ = img.shape r = min(h / im_h, w / im_w) oh, ow = int(im_h * r), int(im_w * r) resized_img = cv2.resize( img, (ow, oh), interpolation=cv2.INTER_LINEAR, ) data = np.zeros((h, w, 3), dtype=np.uint8) ph, pw = (h - oh) // 2, (w - ow) // 2 data[ph:ph + oh, pw:pw + ow] = resized_img data = normalize_image(data, '127.5') data = data.transpose((2, 0, 1)) data = np.expand_dims(data, axis=0) data = data.astype(np.float32) return data, (ph, pw), (oh, ow)
def preprocess(img): im_h, im_w, _ = img.shape ow, oh = im_w, im_h if im_w % (1 << 7) != 0: ow = (((im_w >> 7) + 1) << 7) if im_h % (1 << 7) != 0: oh = (((im_h >> 7) + 1) << 7) pad = np.zeros((oh, ow, 3)) pad_h = (oh - im_h) // 2 pad_w = (ow - im_w) // 2 # reflection padding pad[pad_h:pad_h + im_h, pad_w:pad_w + im_w, :] = img if 0 < pad_w: ref = img[:, ::-1, :] pad[pad_h:pad_h + im_h, :pad_w, :] = ref[:, -pad_w:, :] rem = ow - pad_w - im_w pad[pad_h:pad_h + im_h, -rem:, :] = ref[:, :rem, :] if 0 < pad_h: ref = pad[pad_h:pad_h + im_h, :, :][::-1] pad[:pad_h, ...] = ref[-pad_h:, ...] rem = oh - pad_h - im_h pad[-rem:, ...] = ref[:rem, ...] img = pad img = normalize_image(img, normalize_type='255') img = img[:, :, ::-1] # BGR -> RGB img = img.transpose(2, 0, 1) # HWC -> CHW img = np.expand_dims(img, axis=0) img = img.astype(np.float32) return img, (pad_h, pad_w)
def run_training(continue_run): logging.info('EXPERIMENT NAME: %s' % config.experiment_name) init_step = 0 if continue_run: logging.info( '!!!!!!!!!!!!!!!!!!!!!!!!!!!! Continuing previous run !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) try: init_checkpoint_path = utils.get_latest_model_checkpoint_path( log_dir, 'model.ckpt') logging.info('Checkpoint path: %s' % init_checkpoint_path) init_step = int( init_checkpoint_path.split('/')[-1].split('-') [-1]) + 1 # plus 1 b/c otherwise starts with eval logging.info('Latest step was: %d' % init_step) except: logging.warning( '!!! Didnt find init checkpoint. Maybe first run failed. Disabling continue mode...' ) continue_run = False init_step = 0 logging.info( '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) train_on_all_data = config.train_on_all_data # Load data data = acdc_data.load_and_maybe_process_data( input_folder=config.input_folder, preprocessing_folder=config.preprocessing_folder, mode=config.data_mode, size=config.image_size, target_resolution=config.target_resolution, force_overwrite=False, split_test_train=config.split_test_train) # the following are HDF5 datasets, not numpy arrays images_train = data['images_train'] labels_train = data['masks_train'] id_train = data['id_images_train'] if not train_on_all_data: images_val = data['images_test'] labels_val = data['masks_test'] id_val = data['id_images_test'] if config.use_data_fraction: num_images = images_train.shape[0] new_last_index = int(float(num_images) * config.use_data_fraction) logging.warning('USING ONLY FRACTION OF DATA!') logging.warning(' - Number of imgs orig: %d, Number of imgs new: %d' % (num_images, new_last_index)) images_train = images_train[0:new_last_index, ...] labels_train = labels_train[0:new_last_index, ...] logging.info('Data summary:') logging.info(' - Images:') logging.info(images_train.shape) logging.info(images_train.dtype) logging.info(' - Labels:') logging.info(labels_train.shape) logging.info(labels_train.dtype) #pre-process for img in images_train: if config.equalize: img = image_utils.equalization_image(img) if config.clahe: img = image_utils.CLAHE(img) if config.standardize: img = image_utils.standardize_image(img) if config.normalize: img = image_utils.normalize_image(img) if not train_on_all_data: for img in images_val: if config.equalize: img = image_utils.equalization_image(img) if config.clahe: img = image_utils.CLAHE(img) if config.standardize: img = image_utils.standardize_image(img) if config.normalize: img = image_utils.normalize_image(img) if config.prob: #if prob is not 0 logging.info( 'Before data_augmentation the number of training images is:') logging.info(images_train.shape[0]) #augmentation image_aug, label_aug = aug.augmentation_function( images_train, labels_train) #num_aug = image_aug.shape[0] # id images augmented will be b'0.0' #id_aug = np.zeros([num_aug,]).astype('|S9') #concatenate #id_train = np.concatenate((id__train,id_aug)) images_train = np.concatenate((images_train, image_aug)) labels_train = np.concatenate((labels_train, label_aug)) logging.info( 'After data_augmentation the number of training images is:') logging.info(images_train.shape[0]) else: logging.info('No data_augmentation. Number of training images is:') logging.info(images_train.shape[0]) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. image_tensor_shape = [config.batch_size] + list( config.image_size) + [1] mask_tensor_shape = [config.batch_size] + list(config.image_size) images_pl = tf.placeholder(tf.float32, shape=image_tensor_shape, name='images') labels_pl = tf.placeholder(tf.uint8, shape=mask_tensor_shape, name='labels') learning_rate_pl = tf.placeholder(tf.float32, shape=[]) training_pl = tf.placeholder(tf.bool, shape=[]) tf.summary.scalar('learning_rate', learning_rate_pl) # Build a Graph that computes predictions from the inference model. if (config.experiment_name == 'unet2D_valid' or config.experiment_name == 'unet2D_same' or config.experiment_name == 'unet2D_same_mod'): logits = model.inference(images_pl, config, training=training_pl) elif config.experiment_name == 'ENet': with slim.arg_scope( model_structure.ENet_arg_scope(weight_decay=2e-4)): logits = model_structure.ENet( images_pl, num_classes=config.nlabels, batch_size=config.batch_size, is_training=True, reuse=None, num_initial_blocks=1, stage_two_repeat=2, skip_connections=config.skip_connections) else: logging.warning('invalid experiment_name!') logging.info('images_pl shape') logging.info(images_pl.shape) logging.info('labels_pl shape') logging.info(labels_pl.shape) logging.info('logits shape:') logging.info(logits.shape) # Add to the Graph the Ops for loss calculation. [loss, _, weights_norm] = model.loss(logits, labels_pl, nlabels=config.nlabels, loss_type=config.loss_type, weight_decay=config.weight_decay ) # second output is unregularised loss # record how Total loss and weight decay change over time tf.summary.scalar('loss', loss) tf.summary.scalar('weights_norm_term', weights_norm) # Add to the Graph the Ops that calculate and apply gradients. if config.momentum is not None: train_op = model.training_step(loss, config.optimizer_handle, learning_rate_pl, momentum=config.momentum) else: train_op = model.training_step(loss, config.optimizer_handle, learning_rate_pl) # Add the Op to compare the logits to the labels during evaluation. # loss and dice on a minibatch eval_loss = model.evaluation(logits, labels_pl, images_pl, nlabels=config.nlabels, loss_type=config.loss_type) # Build the summary Tensor based on the TF collection of Summaries. summary = tf.summary.merge_all() # Add the variable initializer Op. init = tf.global_variables_initializer() # Create a saver for writing training checkpoints. if train_on_all_data: max_to_keep = None else: max_to_keep = 5 saver = tf.train.Saver(max_to_keep=max_to_keep) saver_best_dice = tf.train.Saver() saver_best_xent = tf.train.Saver() # Create a session for running Ops on the Graph. configP = tf.ConfigProto() configP.gpu_options.allow_growth = True # Do not assign whole gpu memory, just use it on the go configP.allow_soft_placement = True # If a operation is not define it the default device, let it execute in another. sess = tf.Session(config=configP) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(log_dir, sess.graph) # with tf.name_scope('monitoring'): val_error_ = tf.placeholder(tf.float32, shape=[], name='val_error') val_error_summary = tf.summary.scalar('validation_loss', val_error_) val_dice_ = tf.placeholder(tf.float32, shape=[], name='val_dice') val_dice_summary = tf.summary.scalar('validation_dice', val_dice_) val_summary = tf.summary.merge([val_error_summary, val_dice_summary]) train_error_ = tf.placeholder(tf.float32, shape=[], name='train_error') train_error_summary = tf.summary.scalar('training_loss', train_error_) train_dice_ = tf.placeholder(tf.float32, shape=[], name='train_dice') train_dice_summary = tf.summary.scalar('training_dice', train_dice_) train_summary = tf.summary.merge( [train_error_summary, train_dice_summary]) # Run the Op to initialize the variables. sess.run(init) if continue_run: # Restore session saver.restore(sess, init_checkpoint_path) step = init_step curr_lr = config.learning_rate no_improvement_counter = 0 best_val = np.inf last_train = np.inf loss_history = [] loss_gradient = np.inf best_dice = 0 for epoch in range(config.max_epochs): logging.info('EPOCH %d' % epoch) for batch in iterate_minibatches(images_train, labels_train, batch_size=config.batch_size): start_time = time.time() # batch = bgn_train.retrieve() x, y = batch # TEMPORARY HACK (to avoid incomplete batches) if y.shape[0] < config.batch_size: step += 1 continue feed_dict = { images_pl: x, labels_pl: y, learning_rate_pl: curr_lr, training_pl: True } _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 10 == 0: # Print status to stdout. logging.info('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() step += 1 # end epoch logging.info('Training Data Eval:') [train_loss, train_dice] = do_eval(sess, eval_loss, images_pl, labels_pl, training_pl, images_train, labels_train, config.batch_size) train_summary_msg = sess.run(train_summary, feed_dict={ train_error_: train_loss, train_dice_: train_dice }) summary_writer.add_summary(train_summary_msg, step) loss_history.append(train_loss) if len(loss_history) > 5: loss_history.pop(0) loss_gradient = (loss_history[-5] - loss_history[-1]) / 2 logging.info('loss gradient is currently %f' % loss_gradient) if train_loss <= last_train: # best_train: no_improvement_counter = 0 logging.info('Decrease in training error!') else: no_improvement_counter = no_improvement_counter + 1 logging.info('No improvment in training error for %d steps' % no_improvement_counter) last_train = train_loss # Save a checkpoint and evaluate the model periodically. checkpoint_file = os.path.join(log_dir, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. if not train_on_all_data: # Evaluate against the validation set. logging.info('Validation Data Eval:') [val_loss, val_dice] = do_eval(sess, eval_loss, images_pl, labels_pl, training_pl, images_val, labels_val, config.batch_size) val_summary_msg = sess.run(val_summary, feed_dict={ val_error_: val_loss, val_dice_: val_dice }) summary_writer.add_summary(val_summary_msg, step) if val_dice > best_dice: best_dice = val_dice best_file = os.path.join(log_dir, 'model_best_dice.ckpt') saver_best_dice.save(sess, best_file, global_step=step) logging.info( 'Found new best dice on validation set! - %f - Saving model_best_dice.ckpt' % val_dice) if val_loss < best_val: best_val = val_loss best_file = os.path.join(log_dir, 'model_best_xent.ckpt') saver_best_xent.save(sess, best_file, global_step=step) logging.info( 'Found new best crossentropy on validation set! - %f - Saving model_best_xent.ckpt' % val_loss) sess.close() data.close()
def pred(dataURL): """ Render prediction result. """ # decode base64 '._-' -> '+/=' dataURL = dataURL.replace('.', '+') dataURL = dataURL.replace('_', '/') dataURL = dataURL.replace('-', '=') # get the base64 string image_b64_str = dataURL # convert string to bytes byte_data = base64.b64decode(image_b64_str) image_data = BytesIO(byte_data) # open Image with PIL img = Image.open(image_data) # save original image as png (for debugging) ts = time.time() #img.save('image' + str(ts) + '.png', 'PNG') # convert image to RGBA img = img.convert("RGBA") # preprocess the image for the model image_cropped = crop_image(img) # crop the image and resize to 28x28 image_normalized = normalize_image(image_cropped) # normalize color after crop # convert image from RGBA to RGB img_rgb = convert_to_rgb(image_normalized) # convert image to numpy image_np = convert_to_np(img_rgb) # apply model and print prediction label, label_num, preds = get_prediction(model, image_np) print("This is a {}".format(label_num)) # save classification results as a diagram view_classify(image_np, preds) # create plotly visualization graphs = [ #plot with probabilities for each class of images { 'data': [ go.Bar( x = preds.ravel().tolist(), y = list(label_dict.values()), orientation = 'h') ], 'layout': { 'title': 'Class Probabilities', 'yaxis': { 'title': "Classes" }, 'xaxis': { 'title': "Probability", } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render the hook.html passing prediction resuls return render_template( 'hook.html', result = label_num, # predicted class label ids=ids, # plotly graph ids graphJSON=graphJSON, # json plotly graphs dataURL = dataURL # image to display with result )
def prepare_data(input_folder, output_file, mode, size, target_resolution): ''' Main function that prepares a dataset from the raw challenge data to an hdf5 dataset ''' assert (mode in ['2D', '3D']), 'Unknown mode: %s' % mode if mode == '2D' and not len(size) == 2: raise AssertionError('Inadequate number of size parameters') if mode == '3D' and not len(size) == 3: raise AssertionError('Inadequate number of size parameters') if mode == '2D' and not len(target_resolution) == 2: raise AssertionError( 'Inadequate number of target resolution parameters') if mode == '3D' and not len(target_resolution) == 3: raise AssertionError( 'Inadequate number of target resolution parameters') hdf5_file = h5py.File(output_file, "w") nx, ny = size # scale_vector = [config.pixel_size[0] / target_resolution[0], config.pixel_size[1] / target_resolution[1]] count = 1 train_addrs = [] val_addrs = [] masktrain_addrs = [] maskval_addrs = [] # se split_test_train è True allora splitto tra train e validation i pazienti. Quando faccio il test, # split_test_train deve essere False. Split mi dice ogni quanti pazienti vanno in validation. Con 2, il 50% sono divisi. # con 5 per esempio uno ogni 5 finisc in validation etc. split_test_train = config.split_test_train if split_test_train: split = config.split else: split = 99999 path_img = os.path.join(input_folder, 'img') path_mask = os.path.join(input_folder, 'mask') for folders_img, folders_mask in zip(sorted(os.listdir(path_img)), sorted(os.listdir(path_mask))): folder_path_img = os.path.join(path_img, folders_img) folder_path_mask = os.path.join(path_mask, folders_mask) if count % split == 0: #validation path = os.path.join(folder_path_img, '*.png') for file in sorted(glob.glob(path)): val_addrs.append(file) path = os.path.join(folder_path_mask, '*.png') for file in sorted(glob.glob(path)): maskval_addrs.append(file) else: #training path = os.path.join(folder_path_img, '*.png') for file in sorted(glob.glob(path)): train_addrs.append(file) path = os.path.join(folder_path_mask, '*.png') for file in sorted(glob.glob(path)): masktrain_addrs.append(file) count = count + 1 train_shape = (len(train_addrs), nx, ny) val_shape = (len(val_addrs), nx, ny) if config.split_test_train: if len(train_addrs) != len(masktrain_addrs) or len(val_addrs) != len( maskval_addrs): raise AssertionError( 'Error: Masks and Images have not the same number !!!') hdf5_file.create_dataset("images_train", train_shape, np.float32) hdf5_file.create_dataset("masks_train", train_shape, np.uint8) if config.split_test_train: hdf5_file.create_dataset("images_val", val_shape, np.float32) hdf5_file.create_dataset("masks_val", val_shape, np.uint8) for i in range(len(train_addrs)): addr_img = train_addrs[i] addr_mask = masktrain_addrs[i] img = cv2.imread(addr_img, 0) #0 for grayscale mask = cv2.imread(addr_mask, 0) if config.standardize: img = image_utils.standardize_image(img) if config.normalize: img = image_utils.normalize_image(img) img = cv2.resize(img, (nx, ny), interpolation=cv2.INTER_AREA) mask = cv2.resize(mask, (nx, ny), interpolation=cv2.INTER_NEAREST) #img = crop_or_pad_slice_to_size(img, nx, ny) #mask = crop_or_pad_slice_to_size(mask, nx, ny) hdf5_file["images_train"][i, ...] = img[None] hdf5_file["masks_train"][i, ...] = mask[None] if config.split_test_train: for i in range(len(val_addrs)): addr_img = val_addrs[i] addr_mask = maskval_addrs[i] img = cv2.imread(addr_img, 0) mask = cv2.imread(addr_mask, 0) if config.standardize: img = image_utils.standardize_image(img) if config.normalize: img = image_utils.normalize_image(img) img = cv2.resize(img, (nx, ny), interpolation=cv2.INTER_AREA) mask = cv2.resize(mask, (nx, ny), interpolation=cv2.INTER_NEAREST) #img = crop_or_pad_slice_to_size(img, nx, ny) #mask = crop_or_pad_slice_to_size(mask, nx, ny) hdf5_file["images_val"][i, ...] = img[None] hdf5_file["masks_val"][i, ...] = mask[None] # After test train loop: hdf5_file.close()
def recognize_from_image(net): mask_paths = glob.glob('masks/*.jpg') N_mask = len(mask_paths) # input image loop for image_path in args.input: logger.info(image_path) # prepare grand truth gt_img = load_image(image_path) gt_img = cv2.cvtColor(gt_img, cv2.COLOR_BGRA2RGB) gt_img = np.array( Image.fromarray(gt_img).resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR)) gt_img = normalize_image(gt_img, 'ImageNet') gt_img = gt_img.transpose((2, 0, 1)) # channel first # prepare mask if args.mask_index is not None: mask_path = mask_paths[args.mask_index % N_mask] else: mask_path = mask_paths[random.randint(0, N_mask - 1)] mask = load_image(mask_path) mask = cv2.cvtColor(mask, cv2.COLOR_BGRA2RGB) mask = np.array( Image.fromarray(mask).resize((IMAGE_WIDTH, IMAGE_HEIGHT), Image.BILINEAR)) mask = mask.transpose((2, 0, 1)) / 255 # channel first # prepare input data img = gt_img * mask img = np.expand_dims(img, axis=0) mask = np.expand_dims(mask, axis=0) gt_img = np.expand_dims(gt_img, axis=0) logger.debug(f'input image shape: {img.shape}') # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) output = net.predict({'image': img, 'mask': mask}) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') if i != 0: total_time = total_time + (end - start) logger.info( f'\taverage time {total_time / (args.benchmark_count - 1)} ms') else: output = net.predict({'image': img, 'mask': mask}) output, _ = output img = postprocess(img[0]) mask = mask[0].transpose(1, 2, 0) * 255 output = postprocess(output[0]) gt_img = postprocess(gt_img[0]) res_img = np.hstack((img, mask, output, gt_img)) savepath = get_savepath(args.savepath, image_path, ext='.png') logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def recognize_from_video(net): capture = get_capture(args.video) # allocate output buffer f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) h, w = (IMAGE_HEIGHT, IMAGE_WIDTH) if not args.v21 or args.model_type == 'large' \ else (IMAGE_HEIGHT_SMALL, IMAGE_WIDTH_SMALL) zero_frame = np.zeros((f_h, f_w, 3)) resized_img = midas_resize(zero_frame, h, w) save_h, save_w = resized_img.shape[0], resized_img.shape[1] output_frame = np.zeros((save_h, save_w * 2, 3)) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: logger.warning( 'currently, video results cannot be output correctly...') writer = get_writer(args.savepath, save_h, save_w * 2) else: writer = None input_shape_set = False while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break # resize to midas input size frame = midas_resize(frame, h, w) resized_img = normalize_image(frame, 'ImageNet') resized_img = resized_img.transpose((2, 0, 1)) # channel first resized_img = resized_img[np.newaxis, :, :, :] # predict if (not input_shape_set): net.set_input_shape(resized_img.shape) input_shape_set = True result = net.predict(resized_img) # normalize to 16bit depth_min = result.min() depth_max = result.max() max_val = (2**16) - 1 if depth_max - depth_min > np.finfo("float").eps: out = max_val * (result - depth_min) / (depth_max - depth_min) else: out = 0 # convert to 8bit res_img = (out.transpose(1, 2, 0) / 256).astype("uint8") res_img = cv2.cvtColor(res_img, cv2.COLOR_GRAY2BGR) output_frame[:, save_w:save_w * 2, :] = res_img output_frame[:, 0:save_w, :] = frame output_frame = output_frame.astype("uint8") cv2.imshow('depth', output_frame) # save results if writer is not None: writer.write(output_frame) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def compare_images(): """ This is a mode to determine if two input images have the same person by using the CNN model, which is used in DeepSORT to track the same person. It is assumed that there is always only one person in each image. We have not verified, and do not assume, the behavor in the case of multiple people. (Future work) """ # net initialize detector = init_detector(args.env_id) extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=args.env_id) # prepare input data input_data = [] for i in range(len(args.pairimage)): input_data.append(load_image(args.pairimage[i])) # inference print('Start inference...') features = [] for i in range(len(input_data)): # do detection detector.compute(input_data[i], THRESHOLD, IOU) h, w = input_data[i].shape[0], input_data[i].shape[1] bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w) # select person class mask = cls_ids == 0 if mask.sum() == 0: print('Detector could not detect any person ' f'in the input image: {args.pairimage[i]}') print('Program finished.') sys.exit(0) bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, # delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # image crop """ [INFO] If more than one bounding box is detected, the one with the highest confidence is used as correct box. It should be noted that this works because we assume that the input image has only one person. """ x1, y1, x2, y2 = xywh_to_xyxy(bbox_xywh[np.argmax(cls_conf)], h, w) src_img = cv2.cvtColor(input_data[i], cv2.COLOR_BGRA2RGB) img_crop = src_img[y1:y2, x1:x2] # preprocess img_crop = normalize_image( resize(img_crop), 'ImageNet' )[np.newaxis, :, :, :].transpose(0, 3, 1, 2) if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) feature = extractor.predict(img_crop) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: feature = extractor.predict(img_crop) features.append(feature[0]) sim = cosin_metric(features[0], features[1]) if sim >= (1 - MAX_COSINE_DISTANCE): print(f'{args.pairimage}: SAME person (confidence: {sim})') else: print(f'{args.pairimage}: Diefferent person (confidence: {sim})')
def recognize_from_video(): results = [] idx_frame = 0 # net initialize detector = init_detector(args.env_id) extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=args.env_id) # tracker class instance metric = NearestNeighborDistanceMetric( "cosine", MAX_COSINE_DISTANCE, NN_BUDGET ) tracker = Tracker( metric, max_iou_distance=0.7, max_age=70, n_init=3 ) capture = webcamera_utils.get_capture(args.video) # create video writer if args.savepath is not None: writer = webcamera_utils.get_writer( args.savepath, int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), ) else: writer = None print('Start Inference...') while(True): idx_frame += 1 ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break # In order to use ailia.Detector, the input should have 4 channels. input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) h, w = frame.shape[0], frame.shape[1] # do detection detector.compute(input_img, THRESHOLD, IOU) bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w) # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, # delete this line if using a better pedestrian detector bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # do tracking img_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = xywh_to_xyxy(box, h, w) img_crops.append(frame[y1:y2, x1:x2]) if img_crops: # preprocess img_batch = np.concatenate([ normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :] for img in img_crops ], axis=0).transpose(0, 3, 1, 2) # TODO better to pass a batch at once # features = extractor.predict(img_batch) features = [] for img in img_batch: features.append(extractor.predict(img[np.newaxis, :, :, :])[0]) features = np.array(features) else: features = np.array([]) bbox_tlwh = xywh_to_tlwh(bbox_xywh) detections = [ Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE ] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) nms_max_overlap = 1.0 indices = non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker tracker.predict() tracker.update(detections) # update bbox identities outputs = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh() x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w) track_id = track.track_id outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) # draw box for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] frame = draw_boxes(frame, bbox_xyxy, identities) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy)) results.append((idx_frame - 1, bbox_tlwh, identities)) cv2.imshow('frame', frame) if writer is not None: writer.write(frame) if args.savepath is not None: write_results(args.savepath.split('.')[0] + '.txt', results, 'mot') else: write_results('result.txt', results, 'mot') capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() print(f'Save results to {args.savepath}') print('Script finished successfully.')
def recognize_from_video(): try: print('[INFO] Webcam mode is activated') RECORD_TIME = 80 capture = cv2.VideoCapture(int(args.video)) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) except ValueError: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) frame_rate = capture.get(cv2.CAP_PROP_FPS) if FRAME_SKIP: action_recognize_fps = int(args.fps) else: action_recognize_fps = frame_rate if args.savepath != "": size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') writer = cv2.VideoWriter(args.savepath, fmt, action_recognize_fps, size) else: writer = None # pose estimation env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') if args.arch == "lw_human_pose": pose = ailia.PoseEstimator(MODEL_PATH, WEIGHT_PATH, env_id=env_id, algorithm=ALGORITHM) detector = None else: detector = ailia.Detector(DETECTOR_MODEL_PATH, DETECTOR_WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=env_id) # tracker class instance extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=env_id) metric = NearestNeighborDistanceMetric("cosine", MAX_COSINE_DISTANCE, NN_BUDGET) tracker = Tracker(metric, max_iou_distance=0.7, max_age=70, n_init=3) # action recognition env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') model = ailia.Net(ACTION_MODEL_PATH, ACTION_WEIGHT_PATH, env_id=env_id) action_data = {} frame_nb = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) idx_frame = 0 time_start = time.time() while (True): time_curr = time.time() if args.video == '0' and time_curr - time_start > RECORD_TIME: break ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if (not ret) or (frame_nb >= 1 and idx_frame >= frame_nb): break if FRAME_SKIP: mod = round(frame_rate / action_recognize_fps) if mod >= 1: if idx_frame % mod != 0: idx_frame = idx_frame + 1 continue input_image, input_data = adjust_frame_size( frame, frame.shape[0], frame.shape[1], ) input_data = cv2.cvtColor(input_data, cv2.COLOR_BGR2BGRA) # inferece if args.arch == "lw_human_pose": _ = pose.compute(input_data) else: detector.compute(input_data, THRESHOLD, IOU) # deepsort format h, w = input_image.shape[0], input_image.shape[1] if args.arch == "lw_human_pose": bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose( pose, h, w) else: bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w) mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] # bbox dilation just in case bbox too small, # delete this line if using a better pedestrian detector if args.arch == "pose_resnet": # bbox_xywh[:, 3:] *= 1.2 #May need to be removed in the future cls_conf = cls_conf[mask] # do tracking img_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = xywh_to_xyxy(box, h, w) img_crops.append(input_image[y1:y2, x1:x2]) if img_crops: # preprocess img_batch = np.concatenate([ normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :] for img in img_crops ], axis=0).transpose(0, 3, 1, 2) # TODO better to pass a batch at once # features = extractor.predict(img_batch) features = [] for img in img_batch: features.append(extractor.predict(img[np.newaxis, :, :, :])[0]) features = np.array(features) else: features = np.array([]) bbox_tlwh = xywh_to_tlwh(bbox_xywh) detections = [ Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE ] # run on non-maximum supression boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) nms_max_overlap = 1.0 indices = non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # update tracker tracker.predict() tracker.update(detections) # update bbox identities outputs = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlwh() x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w) track_id = track.track_id outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) # action detection actions = [] persons = [] if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] for i, box in enumerate(bbox_xyxy): id = identities[i] if not (id in action_data): action_data[id] = np.zeros( (ailia.POSE_KEYPOINT_CNT - 1, TIME_RANGE, 3)) # action recognition action, person = action_recognition(box, input_image, pose, detector, model, action_data[id]) actions.append(action) persons.append(person) # draw box for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] frame = draw_boxes(input_image, bbox_xyxy, identities, actions, action_data, (0, 0)) for bb_xyxy in bbox_xyxy: bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy)) # draw skelton for person in persons: if person != None: display_result(input_image, person) if writer is not None: writer.write(input_image) # show progress if idx_frame == "0": print() print("\r" + str(idx_frame + 1) + " / " + str(frame_nb), end="") if idx_frame == frame_nb - 1: print() cv2.imshow('frame', input_image) idx_frame = idx_frame + 1 if writer is not None: writer.release() capture.release() cv2.destroyAllWindows() print('Script finished successfully.')