def resize_SVHN_img(input_img): height, width = input_img.shape bg_color = input_img[-1, -1] ''' # padding the image to make it square if width > height: img_padding = np.zeros((width,width),dtype=np.uint8) y_start = int(width/2-height/2) y_end = int(y_start+height) img_padding[y_start:y_end,:]=input_img else: img_padding = np.zeros((height,height),dtype=np.uint8) x_start = int(height/2-width/2) x_end = int(x_start+width) img_padding[:,x_start:x_end] = input_img ''' output_img = etai.resize(input_img, width=28, height=28, interpolation=cv2.INTER_AREA) if bg_color > 100: output_img = 255 - output_img ''' #plot for debug _, axs = plt.subplots(ncols=3, nrows=1) axs[0].matshow(input_img, cmap='gray') axs[1].matshow(img_padding, cmap='gray') axs[2].matshow(output_img, cmap='gray') plt.show() ''' return output_img
def _featurize(self, img): '''Featurizes the input image using VGG-16. The image is resized to 224 x 224 internally, if necessary. Args: img: the input image Returns: the feature vector, a 1D array of length 4096 ''' if etai.is_gray(img): img = etai.gray_to_rgb(img) elif etai.has_alpha(img): img = img[:, :, :3] imgs = [etai.resize(img, 224, 224)] return self.vgg16.evaluate(imgs, layer=self.vgg16.fc2l)[0]
def embed_image(impath): '''Embeds the image using VGG-16 and stores the embeddeding as an .npz file on disk. Args: impath: path to an image to embed ''' img = etai.read(impath) rimg = etai.resize(img, 224, 224) vgg16 = etav.VGG16() embedded_vector = vgg16.evaluate([rimg], layer=vgg16.fc2l)[0] logger.info("Image embedded to vector of length %d", len(embedded_vector)) logger.info("%s", embedded_vector) outpath = _abspath("out/result_embed_image.npz") etau.ensure_basedir(outpath) np.savez_compressed(outpath, v=embedded_vector) logger.info("Result saved to '%s'", outpath)
def predict(self, img): scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]).cpu().numpy() _t = {'im_detect': Timer(), 'misc': Timer()} assert img.shape[2] == 3 # testing what should be done in the transform print('*** transforming the image ***') rgb_means = (104/255, 117/255, 123/255) rgb_std = (1, 1, 1) print(rgb_means) print(np.max(img)) img_ = etai.resize(img, width=300, height=300) img_ -= rgb_means img_ /= rgb_std print(np.max(img_)) print(np.min(img_)) img_ = img_.transpose((2, 0, 1)) x = torch.from_numpy(img_) x = Variable(x.unsqueeze(0), volatile=True) print(x.shape) print('*** finished transforming the image ***') if self.cuda: x = x.to("cuda") _t['im_detect'].tic() out = net(x, test=True) # forward pass boxes, scores = self.detection.forward(out, priors) detect_time = _t['im_detect'].toc() boxes = boxes[0] scores = scores[0] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image boxes *= scale _t['misc'].tic() all_boxes = [[] for _ in range(num_classes)] for j in range(1, num_classes): inds = np.where(scores[:, j] > self.thresh)[0] if len(inds) == 0: all_boxes[j] = np.zeros([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) # keep = nms(c_bboxes,c_scores) keep = py_cpu_nms(c_dets, 0.45) keep = keep[:50] c_dets = c_dets[keep, :] all_boxes[j] = c_dets if self.max_per_image > 0: image_scores = np.hstack([all_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > self.max_per_image: image_thresh = np.sort(image_scores)[-self.max_per_image] for j in range(1, num_classes): keep = np.where(all_boxes[j][:, -1] >= image_thresh)[0] all_boxes[j] = all_boxes[j][keep, :] nms_time = _t['misc'].toc() print('net time: ', detect_time) print('post time: ', nms_time) return all_boxes
from builtins import * # pragma pylint: enable=redefined-builtin # pragma pylint: enable=unused-wildcard-import # pragma pylint: enable=wildcard-import import os import cv2 import eta.core.image as etai def plot(img): cv2.imshow("*** Press any key to exit ***", etai.rgb_to_bgr(img)) cv2.waitKey(0) cv2.destroyAllWindows() here = os.path.dirname(__file__) path1 = os.path.join(here, "data/water.jpg") path2 = os.path.join(here, "data/logo.png") img1 = etai.resize(etai.read(path1), width=1024) img2 = etai.resize(etai.read(path2), width=400) x0 = etai.Width("30%").render(img=img1) y0 = etai.Height("15%").render(img=img1) img3 = etai.overlay(img1, img2, x0=x0, y0=y0) plot(img3)