def load_train_data(image_path, load_size=286, fine_size=256, is_testing=False): img_A = imread(image_path[0]) img_B = imread(image_path[1]) if not is_testing: img_A = imresize(img_A, [load_size, load_size]) img_B = imresize(img_B, [load_size, load_size]) h1 = int(np.ceil(np.random.uniform(1e-2, load_size - fine_size))) w1 = int(np.ceil(np.random.uniform(1e-2, load_size - fine_size))) img_A = img_A[h1:h1 + fine_size, w1:w1 + fine_size] img_B = img_B[h1:h1 + fine_size, w1:w1 + fine_size] if np.random.random() > 0.5: img_A = np.fliplr(img_A) img_B = np.fliplr(img_B) else: img_A = imresize(img_A, [fine_size, fine_size]) img_B = imresize(img_B, [fine_size, fine_size]) img_A = img_A / 127.5 - 1. img_B = img_B / 127.5 - 1. img_AB = np.concatenate((img_A, img_B), axis=2) # img_AB shape: (fine_size, fine_size, input_c_dim + output_c_dim) return img_AB
def scale(X, start_ratio=0.1, end_ratio=2, num=20, bg_val=0): if len(X.shape) > 3: raise ValueError("Can only craete a dataset of shifted \ images for only one original image") W, H, _ = X.shape scaled_X = [] for i in np.linspace(start_ratio, end_ratio, num=num): new_H, new_W = int(H * i), int(W * i) if new_H < H: d1, d2 = H - new_H, W - new_W up = d1 // 2 down = d1 - up left = d2 // 2 right = d2 - left scaled = np.pad(X, ((up, down), (left, right), (0, 0)), 'constant', constant_values=bg_val)[:, :, 0] scaled = imresize(scaled, (W, H))[:, :, None] elif new_H > H: scaled = imresize(X[:, :, 0], (new_H, new_W))[:, :, None] d1, d2 = new_H - H, new_W - W up = d1 // 2 down = d1 - up left = d2 // 2 right = d2 - left scaled = scaled[left:-right, up:-down] else: scaled = X.copy() scaled_X.append(scaled[None, :]) return np.vstack(scaled_X)
def preprocess_image(img1, img2, scale_size, crop_size, flip, training): """ :param img1: :param img2: :param scale_size: :param crop_size: :param flip: :param training: :return: """ if training: # random cropping when training img1 = imresize(img1, (scale_size, scale_size)) img2 = imresize(img2, (scale_size, scale_size)) ws = np.random.randint(0, scale_size - crop_size) hs = np.random.randint(0, scale_size - crop_size) img1 = img1[hs:hs + crop_size, ws:ws + crop_size] img2 = img2[hs:hs + crop_size, ws:ws + crop_size] # flip if flip and np.random.random() > 0.5: img1 = np.fliplr(img1) img2 = np.fliplr(img2) else: img1 = imresize(img1, (crop_size, crop_size)) img2 = imresize(img2, (crop_size, crop_size)) return img1, img2
def draw_class_activation_map(img, class_activation_map, img_alpha=0.6, size=None): # resize input images if size is not None: r = size / np.minimum(img.shape[0], img.shape[1]) img = imresize(img, output_shape=[ int(r * img.shape[0] + 0.5), int(r * img.shape[1] + 0.5) ], preserve_range=True) class_activation_map = imresize(class_activation_map, output_shape=[img.shape[0], img.shape[1]]) # create rgb overlay cm = plt.get_cmap('jet') cam_ovlr = cm(class_activation_map) # normalize to 0..1 and convert to grayscale img = img / 255.0 img_gray = 0.299 * img[:, :, 0] + 0.587 * img[:, :, 1] + 0.114 * img[:, :, 2] # create heatmap composite cam_heatmap = img_alpha * np.expand_dims( img_gray, axis=-1) + (1 - img_alpha) * cam_ovlr[:, :, 0:3] # visualize plt.imshow(cam_heatmap) plt.show()
def _get_obs(self): if self.obs_type == 'q': pinchpos = np.array(self.body.getPincherCentroid()) state, velocities = self.body.getJointStates() q = np.array(state) qdot = np.array(velocities) dist = self.targetpos - pinchpos return np.concatenate((q, qdot, dist), axis=0) channels = 3 if self.obs_type == 'rgbd': channels = 4 obs = np.empty((self.image_height, self.image_width, channels * len(self.camera_pos))) target_pos = (0, 0, 0) camera_up = (0, 0, 1) near_val, far_val = 0.01, 5 fov = 80 proj_mat = p.computeProjectionMatrixFOV(fov, 1.0, near_val, far_val) for i, camera_pos in enumerate(self.camera_pos): view_mat = p.computeViewMatrix(camera_pos, target_pos, camera_up) _w, _h, rgba, depth, _objects = p.getCameraImage( self.render_width, self.render_height, view_mat, proj_mat, shadow=0) rgba = rgba.astype(np.float32) / 255.0 if self.obs_type == 'rgbd': depth = depth.clip(0, 5) rgba[:, :, 3] = imresize(depth, (self.image_height, self.image_width)) else: rgba = imresize( rgba[:, :, :3], (self.image_height, self.image_width)) # slice off alpha, always 1.0 obs[:, :, (i * channels):((i + 1) * channels)] = rgba # import matplotlib.pyplot as plt # for i in range(len(self.camera_pos)): # plt.subplot(1,len(self.camera_pos), i+1) # plt.imshow(obs[:,:,(i*3):((i+1)*3)]) # plt.show() return obs
def getNextBatch(self): for i in range(self.videoNumPerBatch): if self.currentVideoInd == self.videoNum: self.currentVideoInd = 0 rd.shuffle(self.videoNames) currentVideoName = self.videoNames[self.currentVideoInd] currentVideoRgbFramesDirPath = os.path.join(self.rgbDataDir, currentVideoName) currentVideoOpticalFlowDirPath_u = os.path.join(self.opticalFlowDir_u, currentVideoName) currentVideoOpticalFlowDirPath_v = os.path.join(self.opticalFlowDir_v, currentVideoName) self.labelBatch[i] = self.labelNames.index(currentVideoName[2:-8]) currentVideoFrameNames = os.listdir(currentVideoRgbFramesDirPath) currentVideoFrameNames.sort() frameNumOfCurrentVideo = len(currentVideoFrameNames) - 1 sampleFrameLocs = np.linspace(0, frameNumOfCurrentVideo - 1, self.stackDepth * self.samplingFrameStackNumPerVideo, dtype='int') for stackIte in range(self.samplingFrameStackNumPerVideo): stackFrameInds = sampleFrameLocs[stackIte * self.stackDepth : (stackIte + 1) * self.stackDepth] for frameIte in range(self.stackDepth): currentFrame = imread(os.path.join(currentVideoRgbFramesDirPath, currentVideoFrameNames[stackFrameInds[frameIte]])) currentFrame = imresize(currentFrame.astype('float64'), [self.frameHeight, self.frameWidth]) self.sampleBatch[i + stackIte * self.videoNumPerBatch, 0:3, frameIte, :, :] = currentFrame.transpose([2,0,1]) flow_u = imread(os.path.join(currentVideoOpticalFlowDirPath_u, 'frame' + currentVideoFrameNames[stackFrameInds[frameIte]])) flow_u = imresize(flow_u.astype('float64'), [self.frameHeight, self.frameWidth]) flow_u = flow_u - 128 flow_v = imread(os.path.join(currentVideoOpticalFlowDirPath_v, 'frame' + currentVideoFrameNames[stackFrameInds[frameIte]])) flow_v = imresize(flow_v.astype('float64'), [self.frameHeight, self.frameWidth]) flow_v = flow_v - 128 self.sampleBatch[i + stackIte * self.videoNumPerBatch, 3, frameIte, :, :] = flow_u self.sampleBatch[i + stackIte * self.videoNumPerBatch, 4, frameIte, :, :] = flow_v #print i, stackIte,i + stackIte * self.videoNumPerBatch, frameIte self.currentVideoInd += 1 return self.sampleBatch, self.labelBatch, self.clipMarkerBatch
def resize(images, height, width, *args, **kwargs): left_image, right_image, disp_image = images left_scale = np.max(np.abs(left_image)) left_image /= left_scale left_image_resize = imresize(left_image, (height, width), mode='reflect') left_image_resize *= left_scale right_scale = np.max(np.abs(right_image)) right_image /= right_scale right_image_resize = imresize(right_image, (height, width), mode='reflect') right_image_resize *= right_scale return [left_image_resize, right_image_resize, disp_image]
def prepareGrayscaleImage(imageData, imagesMean): IMAGE_DIM = 256 CROPPED_DIM = 227 # Convert an image returned by Matlab's imread to im_data in caffe's data # format: W x H x C with BGR channelsM = 227 rgbImage = np.tile(imageData, (1, 1, 3)) imageData = rgbImage[:, :, [3, 2, 1]] # permute channels from RGB to BGR imageData = np.transpose(imageData, (2, 1, 3)) # flip width and height # imageData = single(imageData) # convert from uint8 to single imageData = imresize(imageData, (IMAGE_DIM, IMAGE_DIM), 'bilinear') # resize im_data imageData = imageData - imagesMean # subtract mean_data (already in W x H x C, BGR) imageData = imresize(imageData, (CROPPED_DIM, CROPPED_DIM), 'bilinear') # resize im_data preprocessedImage = np.zeros((CROPPED_DIM, CROPPED_DIM, 3), 'double') preprocessedImage[:, :, :] = imageData return preprocessedImage
def v_2D(output, grad): weight = grad.mean(dim=(2, 3)) s, c = weight.size() cam = F.relu((weight.view(s, c, 1, 1) * output).sum(dim=1)) cam = cam.data.cpu().numpy().astype('float') cam = imresize(cam, (128, 128, 128)) return cam
def draw_seg(self, img, seg_gt, segmentation, name): """Applies generated segmentation mask to an image""" palette = np.load('Extra/palette.npy').tolist() img_size = (img.shape[0], img.shape[1]) segmentation = imresize(segmentation, img_size, order=0, preserve_range=True, mode='constant', anti_aliasing=False).astype(int) image = Image.fromarray((img * 255).astype('uint8')) segmentation_draw = Image.fromarray((segmentation).astype('uint8'), 'P') segmentation_draw.putpalette(palette) segmentation_draw.save(self.directory + '/%s_segmentation.png' % name, 'PNG') image.save(self.directory + '/%s.jpg' % name, 'JPEG') #if seg_gt: if not seg_gt is None: seg_gt_draw = Image.fromarray((seg_gt).astype('uint8'), 'P') seg_gt_draw.putpalette(palette) seg_gt_draw.save(self.directory + '/%s_seg_gt.png' % name, 'PNG')
def draw(self, img_path, name, dets, scores, cats, mask): image = Image.open(img_path) w, h = image.size mask = imresize(mask, (h, w), order=0, preserve_range=True).astype(int) image = put_transparent_mask(image, mask, palette) dr = ImageDraw.Draw(image) for i in range(len(cats)): cat = cats[i] score = scores[i] bbox = np.array(dets[i]) bbox[[2, 3]] += bbox[[0, 1]] color = colors[cat] draw_rectangle(dr, bbox, color, width=5) dr.text(bbox[:2], self.loader.ids_to_cats[cat] + ' ' + str(score)[:4], fill=color, font=font) path_to_save = opj(EVAL_DIR, 'demodemo', 'output', name + '_processed' + self.loader.data_format) image.save(path_to_save, 'JPEG') self.last_path = path_to_save self.view_classes = False return image
def imread_tile(f, tile_shape, resize='resize', quantize=None): assert (len(tile_shape) == 2) tile = imread(f) nr, nc = tile.shape[0], tile.shape[1] scalef = 255 if tile.dtype == float else 1 nbands = 1 if tile.ndim == 2 else tile.shape[2] if nbands == 1: tile = tile.squeeze() tile = d_[tile, tile, tile] elif nbands == 4: tile = tile[:, :, :-1] if resize == 'extract': roff = nr - tile_shape[0] coff = nc - tile_shape[1] r = 0 if roff < 0 else randint(roff) c = 0 if coff < 0 else randint(coff) print('before', tile.shape) tile = extract_tile(tile, (r, c), tile_shape[0]) print(tile.shape) elif resize == 'crop': tile = imcrop(tile, tile_shape) nr, nc = tile.shape[0], tile.shape[1] if resize == 'resize' or (nr, nc) != tile_shape: tile = imresize(tile, tile_shape, preserve_range=True) tile = scalef * tile if quantize == 'u8' and nbands != 1: tile = 255 * (tile.sum(axis=2) / (np.float64(256**nbands) - 1)) return np.uint8(tile)
def generate(self, train=True): while True: inputs = [] targets = [] for imagename, num_objects, class_numbers, x_y_centres, size, rotation in self.zip_gt_bbox: img_path = self.folder_path_prefix + imagename img = imread(img_path).astype('float32') img = imresize(img, self.image_size).astype('float32') # print(f"input img shape: {img.shape}") # img = np.concatenate([img for i in range(3)]) img = skimage.color.gray2rgb(img) # print(f"input img shape: {img.shape}") # pdb.set_trace() y = np.zeros((num_objects, 4 + 10)) for obj_sample in range(num_objects): y[obj_sample][0] = int(x_y_centres[obj_sample].split('-') [0]) - int(size[obj_sample]) // 2 y[obj_sample][1] = int(x_y_centres[obj_sample].split('-') [1]) - int(size[obj_sample]) // 2 y[obj_sample][2] = y[obj_sample][0] + int(size[obj_sample]) y[obj_sample][3] = y[obj_sample][1] + int(size[obj_sample]) # y[obj_sample][4:] = np.eye(10)[int(class_numbers[obj_sample])] y[obj_sample][4 + int(class_numbers[obj_sample])] = 1.0 y[:, :4] = np.floor(y[:, :4] * 300 / 224) y = self.bbox_util.assign_boxes(y) inputs.append(img) targets.append(y) if len(targets) == self.batch_size: tmp_inp = np.array(inputs) tmp_targets = np.array(targets) inputs = [] targets = [] yield preprocess_input(tmp_inp), tmp_targets
def center_crop(x, crop_h, crop_w, resize_h=64, resize_w=64): if crop_w is None: crop_w = crop_h h, w = x.shape[:2] j = int(round((h - crop_h) / 2.)) i = int(round((w - crop_w) / 2.)) return imresize(x[j:j + crop_h, i:i + crop_w], [resize_h, resize_w])
def _read_image(full_path, color=False, scale_factor=None): final_scale_factor = None if scale_factor is not None: if not (0 < scale_factor <= 20): msg = "'scale_factor' parameter must be > 0 and < 20." print(msg) raise ValueError(msg) final_scale_factor = float(scale_factor) # print("Loading image '%s'." % full_path) img = imread(full_path, as_gray=(not color)) # Checks if jpeg reading worked. Refer to skimage issue #3594 for more details. if img.ndim is 0: msg = ("Failed to read the image file %s, " "please make sure that libjpeg is installed." % full_path) print(msg) raise RuntimeError(msg) if final_scale_factor is not None and not np.isclose( final_scale_factor, 1.): h, w = img.shape[0], img.shape[1] h = int(final_scale_factor * h) w = int(final_scale_factor * w) img = imresize(img, (h, w)) if not color: # skimage.io.imread returns [0-1] float64 images when as_gray is True img = np.uint8(img * 255) return img
def loadFile(folder): fileList = [] for root, dirs, files in os.walk(folder): if dirs == []: for f in files: filepath = os.path.join(root, f) fileList.append(filepath) file_dict = {"image": [], "label": [], "class": []} text = '' if 'train' in fileList[0]: text = 'Start fill train_dict' elif 'test' in fileList[0]: text = 'Start fill test_dict' for p in tqdm(fileList, ascii=True, ncols=85, desc=text): image = imread(p) image = imresize(image, (51, 51, 3)) file_dict['image'].append(image) file_dict['label'].append(str(str(p.split('/')[-1]))) if 'train' in p: file_dict['class'].append(str(p.split('/')[-2])) return file_dict
def setup_mnist(self, img_res): print("Setting up MNIST...") if not os.path.exists( os.path.join(ROOT_path, '.keras/datasets/mnist_x.npy')): # Load the dataset (mnist_X, mnist_y), (_, _) = mnist.load_data() # Normalize and rescale images mnist_X = self.normalize(mnist_X) mnist_X = np.array([imresize(x, img_res) for x in mnist_X]) mnist_X = np.expand_dims(mnist_X, axis=-1) mnist_X = np.repeat(mnist_X, 3, axis=-1) self.mnist_X, self.mnist_y = mnist_X, mnist_y # Save formatted images np.save(os.path.join(ROOT_path, '.keras/datasets/mnist_x.npy'), self.mnist_X) np.save(os.path.join(ROOT_path, '.keras/datasets/mnist_y.npy'), self.mnist_y) else: self.mnist_X = np.load( os.path.join(ROOT_path, '.keras/datasets/mnist_x.npy')) self.mnist_y = np.load( os.path.join(ROOT_path, '.keras/datasets/mnist_y.npy')) print("+ Done.")
def mask4vis(cfg, curr_img, vis_size): curr_img = np.clip(curr_img, 0.0, 1.0) curr_img = imresize(curr_img, (vis_size, vis_size), order=3) curr_img = np.clip(curr_img * 255, 0, 255).astype(dtype=np.uint8) if curr_img.shape[-1] != 3 and not cfg.vis_depth_projs: curr_img = 255 - curr_img return curr_img
def load_mask(mask_path, shape, return_mask_img=False): if K.image_dim_ordering() == "th": _, channels, width, height = shape else: _, width, height, channels = shape mask = imread(mask_path, mode="L") # Grayscale mask load mask = imresize(mask, (width, height)).astype('float32') # Perform binarization of mask mask[mask <= 127] = 0 mask[mask > 128] = 255 max = np.amax(mask) mask /= max if return_mask_img: return mask mask_shape = shape[1:] mask_tensor = np.empty(mask_shape) for i in range(channels): if K.image_dim_ordering() == "th": mask_tensor[i, :, :] = mask else: mask_tensor[:, :, i] = mask return mask_tensor
def preprocess(img, crop_factor=0.8): """Replicate the preprocessing we did on the VAE/GAN. This model used a crop_factor of 0.8 and crop size of [100, 100, 3]. Parameters ---------- img : TYPE Description crop_factor : float, optional Description Returns ------- TYPE Description """ crop = np.min(img.shape[:2]) r = (img.shape[0] - crop) // 2 c = (img.shape[1] - crop) // 2 cropped = img[r:r + crop, c:c + crop] r, c, *d = cropped.shape if crop_factor < 1.0: amt = (1 - crop_factor) / 2 h, w = int(c * amt), int(r * amt) cropped = cropped[h:-h, w:-w] rsz = imresize(cropped, (100, 100), preserve_range=False) return rsz
def preprocess_image_batch(image_path, img_size=None, crop_size=None, color_mode="rgb"): img_list = [] img = imread(image_path, pilmode='RGB') if img_size: img = 255 * np.array(imresize(img, img_size)) img = img.astype('float32') # We normalize the colors (in RGB space) with the empirical means on the training set img[:, :, 0] -= 123.68 img[:, :, 1] -= 116.779 img[:, :, 2] -= 103.939 if crop_size: img = img[(img_size[0] - crop_size[0]) // 2:(img_size[0] + crop_size[0]) // 2, (img_size[1] - crop_size[1]) // 2:(img_size[1] + crop_size[1]) // 2, :] img_list.append(img) img_batch = np.stack(img_list, axis=0) return img_batch
def score_frame(model, history, ix, r, d, interp_func, mode="policy", task=0): # r: radius of blur # d: density of scores (if d==1, then get a score for every pixel... # if d==2 then every other, which is 25% of total pixels for a 2D image) L = run_through_model(model, history, ix, interp_func, mask=None, mode=mode, task=task) # saliency scores S(t,i,j) scores = np.zeros((int(84 / d) + 1, int(84 / d) + 1)) for i in range(0, 84, d): for j in range(0, 84, d): mask = get_mask(center=[i, j], size=[84, 84], r=r) l = run_through_model(model, history, ix, interp_func, mask=mask, mode=mode, task=task) scores[int(i / d), int(j / d)] = (L - l).pow(2).sum().mul_(.5).item() pmax = scores.max() scores = imresize(scores, (84, 84)).astype(np.float32) scores = pmax * scores / scores.max() return scores
def get_data_imagenet(datadirb1, datadirb2, D=128): b1 = sorted(glob.glob('{}/*'.format(datadirb1))) b2 = sorted(glob.glob('{}/*'.format(datadirb2))) b1 = [ fn for fn in b1 if any( ['png' in fn.lower(), 'jpeg' in fn.lower(), 'jpg' in fn.lower()]) ] b2 = [ fn for fn in b2 if any( ['png' in fn.lower(), 'jpeg' in fn.lower(), 'jpg' in fn.lower()]) ] b1 = [imresize(imread(f), (D, D)) for f in b1] b2 = [imresize(imread(f), (D, D)) for f in b2] #b1 = [im for im in b1 if len(im.shape) == 3] #b2 = [im for im in b2 if len(im.shape) == 3] b1 = [im for im in b1] b2 = [im for im in b2] ind = 0 for im in b1: if len(im.shape) == 2: b1[ind] = gray2rgb(im) ind = ind + 1 ind = 0 for im in b2: if len(im.shape) == 2: b2[ind] = gray2rgb(im) ind = ind + 1 print(b1[0].shape) print(b2[0].shape) b1 = np.stack(b1, axis=0) b2 = np.stack(b2, axis=0) b1 = b1.astype(np.float32) b2 = b2.astype(np.float32) print(b1.shape) print(b2.shape) b1 = (b1 / 127.5) - 1 b2 = (b2 / 127.5) - 1 return b1, b2, 3, int(.8 * D)
def load_tensor_image(filename, args): img = imread(filename).astype(np.float32) h, w, _ = img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): img = imresize(img, (args.img_height, args.img_width)).astype(np.float32) img = np.transpose(img, (2, 0, 1)) tensor_img = ((torch.from_numpy(img).unsqueeze(0)/255-0.45)/0.225).to(device) return tensor_img
def load_tensor_image(img, resize=(256, 320)): img = img[:, 30:510] if resize: resized_img = imresize(img, resize) cv2.imshow('resized', resized_img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img.astype(np.float32) if resize: img = imresize(img, resize) img = np.transpose(img, (2, 0, 1)) tensor_img = ((torch.from_numpy(img).unsqueeze( 0) / 255 - 0.45) / 0.225).to(device) print(f'tensor shape {tensor_img.shape}') return tensor_img
def _observation(self, img): img = self.crop(img) img = imresize(img, self.img_size) if self.grayscale: img = img.mean(-1, keepdims=True) img = np.transpose(img, (2, 0, 1)) img = img.astype('float32') / 255. return img
def _resize(images, size='smean'): return np.stack([ imresize(image, size, order=1, mode='constant', anti_aliasing=False, preserve_range=True) for image in images ])
def sample_images(data_dir, batch_size, high_resolution_shape, low_resolution_shape): all_images = glob.glob(data_dir + '*.jpg') images_batch = np.random.choice(all_images, size=batch_size) low_resolution_images = [] high_resolution_images = [] for img in images_batch: img1 = imread(img, as_gray=False, pilmode='RGB') img1 = img1.astype(np.float32) img1_high_resolution = imresize(img1, high_resolution_shape) img1_low_resolution = imresize(img1, low_resolution_shape) if np.random.random() < 0.5: img1_high_resolution = np.fliplr(img1_high_resolution) img1_low_resolution = np.fliplr(img1_low_resolution) high_resolution_images.append(img1_high_resolution) low_resolution_images.append(img1_low_resolution) return np.array(high_resolution_images), np.array( low_resolution_images)
def randomize_image(img, enlarge_size=286, output_size=256): img = imresize(img, [enlarge_size, enlarge_size]) h1 = int(np.ceil(np.random.uniform(1e-2, enlarge_size - output_size))) w1 = int(np.ceil(np.random.uniform(1e-2, enlarge_size - output_size))) img = img[h1:h1 + output_size, w1:w1 + output_size] if np.random.random() > .5: img = np.fliplr(img) return img
def gradcam(self, X, layer_name, batch_size=16, channel_first=False, alpha=0.4): output_scalor = self.qoi(self.model.output) layer_output = self.model.get_layer(layer_name).output grad_list = K.gradients(output_scalor, [layer_output]) layer_infl_fn = K.function(self.model.inputs, grad_list) layer_infl = [] num_batch = X.shape[0] // batch_size leftover = X.shape[0] % batch_size if leftover: num_batch += 1 if self.verbose: generator = trange(num_batch) else: generator = range(num_batch) layer_infl = [] for i in generator: if i == num_batch - 1: x = X[i * batch_size:] else: x = X[i * batch_size:(i + 1) * batch_size] batch_attr = layer_infl_fn([x])[0] layer_infl.append(batch_attr) layer_infl = np.vstack(layer_infl) if channel_first: _, _, W, H = self.model.input_shape layer_infl = np.transpose(layer_infl, (0, 3, 1, 2)) weight = np.mean(layer_infl, axis=(2, 3)) else: _, W, H, _ = self.model.input_shape weight = np.mean(layer_infl, axis=(1, 2)) weight_tensor = torch.from_numpy(weight).unsqueeze(-1) infl_tensor = torch.from_numpy(layer_infl) n, w, h, c = infl_tensor.size() cam = torch.bmm(infl_tensor.view(n, w * h, c), weight_tensor) cam = cam.view(n, h, w) cam = cam.numpy() cam = np.maximum(np.zeros_like(cam), cam) result = [] for img in cam: upsampled = imresize(img, (W, H)) result.append(upsampled[None, :]) result = np.vstack(result) result /= np.max(result, axis=(1, 2), keepdims=True) return result[:, :, :, None]
def preproc_image(self, img): """what happens to the observation""" img = self.crop(img) img = imresize(img, self.img_size) if not self.color: img = img.mean(-1, keepdims=True) if self.dim_order == 'theano': img = img.transpose([2, 0, 1]) # [h, w, c] to [c, h, w] img = img.astype('float32') return img
def draw_seg(self, img, seg_gt, segmentation, name): """Applies generated segmentation mask to an image""" palette = np.load('Extra/palette.npy').tolist() img_size = (img.shape[0], img.shape[1]) segmentation = imresize(segmentation, img_size, order=0, preserve_range=True).astype(int) image = Image.fromarray((img * 255).astype('uint8')) segmentation_draw = Image.fromarray((segmentation).astype('uint8'), 'P') segmentation_draw.putpalette(palette) segmentation_draw.save(self.directory + '/%s_segmentation.png' % name, 'PNG') image.save(self.directory + '/%s.jpg' % name, 'JPEG') if seg_gt: seg_gt_draw = Image.fromarray((seg_gt).astype('uint8'), 'P') seg_gt_draw.putpalette(palette) seg_gt_draw.save(self.directory + '/%s_seg_gt.png' % name, 'PNG')
def preprocess(img, crop=True, resize=True, dsize=(224, 224)): if img.dtype == np.uint8: img = img / 255.0 if crop: short_edge = min(img.shape[:2]) yy = int((img.shape[0] - short_edge) / 2) xx = int((img.shape[1] - short_edge) / 2) crop_img = img[yy: yy + short_edge, xx: xx + short_edge] else: crop_img = img if resize: norm_img = imresize(crop_img, dsize, preserve_range=True) else: norm_img = crop_img return (norm_img).astype(np.float32)
def preprocess(img, crop=True, resize=True, dsize=(224, 224)): mean_img = np.array([164.76139251, 167.47864617, 181.13838569]) if img.dtype == np.uint8: img = (img[..., ::-1] - mean_img).astype(np.float32) else: img = img[..., ::-1] * 255.0 - mean_img if crop: short_edge = min(img.shape[:2]) yy = int((img.shape[0] - short_edge) / 2) xx = int((img.shape[1] - short_edge) / 2) crop_img = img[yy: yy + short_edge, xx: xx + short_edge] else: crop_img = img if resize: norm_img = imresize(crop_img, dsize, preserve_range=True) else: norm_img = crop_img return (norm_img).astype(np.float32)
def preprocess(img, crop=True, resize=True, dsize=(299, 299)): """Summary Parameters ---------- img : TYPE Description crop : bool, optional Description resize : bool, optional Description dsize : tuple, optional Description Returns ------- TYPE Description """ if img.dtype != np.uint8: img *= 255.0 if crop: crop = np.min(img.shape[:2]) r = (img.shape[0] - crop) // 2 c = (img.shape[1] - crop) // 2 cropped = img[r: r + crop, c: c + crop] else: cropped = img if resize: rsz = imresize(cropped, dsize, preserve_range=True) else: rsz = cropped if rsz.ndim == 2: rsz = rsz[..., np.newaxis] rsz = rsz.astype(np.float32) # subtract imagenet mean return (rsz - 117)
def preprocess(img, crop=True, resize=True, dsize=(299, 299)): if img.dtype != np.uint8: img *= 255.0 if crop: crop = np.min(img.shape[:2]) r = (img.shape[0] - crop) // 2 c = (img.shape[1] - crop) // 2 cropped = img[r: r + crop, c: c + crop] else: cropped = img if resize: rsz = imresize(cropped, dsize, preserve_range=True) else: rsz = cropped if rsz.ndim == 2: rsz = rsz[..., np.newaxis] rsz = rsz.astype(np.float32) # subtract imagenet mean return (rsz - 117)
def setup_mnistm(self, img_res): print ("Setting up MNIST-M...") if not os.path.exists('datasets/mnistm_x.npy'): # Download the MNIST-M pkl file filepath = 'datasets/keras_mnistm.pkl.gz' if not os.path.exists(filepath.replace('.gz', '')): print('+ Downloading ' + self.mnistm_url) data = urllib.request.urlopen(self.mnistm_url) with open(filepath, 'wb') as f: f.write(data.read()) with open(filepath.replace('.gz', ''), 'wb') as out_f, \ gzip.GzipFile(filepath) as zip_f: out_f.write(zip_f.read()) os.unlink(filepath) # load MNIST-M images from pkl file with open('datasets/keras_mnistm.pkl', "rb") as f: data = pickle.load(f, encoding='bytes') # Normalize and rescale images mnistm_X = np.array(data[b'train']) mnistm_X = self.normalize(mnistm_X) mnistm_X = np.array([imresize(x, img_res) for x in mnistm_X]) self.mnistm_X, self.mnistm_y = mnistm_X, self.mnist_y.copy() # Save formatted images np.save('datasets/mnistm_x.npy', self.mnistm_X) np.save('datasets/mnistm_y.npy', self.mnistm_y) else: self.mnistm_X = np.load('datasets/mnistm_x.npy') self.mnistm_y = np.load('datasets/mnistm_y.npy') print ("+ Done.")
def preprocess(img, crop=True, resize=True, dsize=(224, 224)): """Summary Parameters ---------- img : TYPE Description crop : bool, optional Description resize : bool, optional Description dsize : tuple, optional Description Returns ------- TYPE Description """ mean_img = np.array([164.76139251, 167.47864617, 181.13838569]) if img.dtype == np.uint8: img = (img[..., ::-1] - mean_img).astype(np.float32) else: img = img[..., ::-1] * 255.0 - mean_img if crop: short_edge = min(img.shape[:2]) yy = int((img.shape[0] - short_edge) / 2) xx = int((img.shape[1] - short_edge) / 2) crop_img = img[yy: yy + short_edge, xx: xx + short_edge] else: crop_img = img if resize: norm_img = imresize(crop_img, dsize, preserve_range=True) else: norm_img = crop_img return (norm_img).astype(np.float32)
def preprocess(img, crop=True, resize=True, dsize=(224, 224)): """Summary Parameters ---------- img : TYPE Description crop : bool, optional Description resize : bool, optional Description dsize : tuple, optional Description Returns ------- TYPE Description """ if img.dtype == np.uint8: img = img / 255.0 if crop: short_edge = min(img.shape[:2]) yy = int((img.shape[0] - short_edge) / 2) xx = int((img.shape[1] - short_edge) / 2) crop_img = img[yy: yy + short_edge, xx: xx + short_edge] else: crop_img = img if resize: norm_img = imresize(crop_img, dsize, preserve_range=True) else: norm_img = crop_img return (norm_img).astype(np.float32)
def setup_mnist(self, img_res): print ("Setting up MNIST...") if not os.path.exists('datasets/mnist_x.npy'): # Load the dataset (mnist_X, mnist_y), (_, _) = mnist.load_data() # Normalize and rescale images mnist_X = self.normalize(mnist_X) mnist_X = np.array([imresize(x, img_res) for x in mnist_X]) mnist_X = np.expand_dims(mnist_X, axis=-1) mnist_X = np.repeat(mnist_X, 3, axis=-1) self.mnist_X, self.mnist_y = mnist_X, mnist_y # Save formatted images np.save('datasets/mnist_x.npy', self.mnist_X) np.save('datasets/mnist_y.npy', self.mnist_y) else: self.mnist_X = np.load('datasets/mnist_x.npy') self.mnist_y = np.load('datasets/mnist_y.npy') print ("+ Done.")
def test_vgg_face(): """Loads the VGG network and applies it to a test image. """ with tf.Session() as sess: net = get_vgg_face_model() x = tf.placeholder(tf.float32, [1, 224, 224, 3], name='x') tf.import_graph_def(net['graph_def'], name='vgg', input_map={'Placeholder:0': x}) g = tf.get_default_graph() names = [op.name for op in g.get_operations()] og = plt.imread('bricks.png')[..., :3] img = preprocess(og)[np.newaxis, ...] plt.imshow(img[0]) plt.show() """Let's visualize the network's gradient activation when backpropagated to the original input image. This is effectively telling us which pixels contribute to the predicted class or given neuron""" features = [name for name in names if 'BiasAdd' in name.split()[-1]] from math import sqrt, ceil n_plots = ceil(sqrt(len(features) + 1)) fig, axs = plt.subplots(n_plots, n_plots) plot_i = 0 axs[0][0].imshow(img[0]) for feature_i, featurename in enumerate(features): plot_i += 1 feature = g.get_tensor_by_name(featurename + ':0') neuron = tf.reduce_max(feature, 1) saliency = tf.gradients(tf.reduce_sum(neuron), x) neuron_idx = tf.arg_max(feature, 1) this_res = sess.run([saliency[0], neuron_idx], feed_dict={x: img}) grad = this_res[0][0] / np.max(np.abs(this_res[0])) ax = axs[plot_i // n_plots][plot_i % n_plots] ax.imshow((grad * 127.5 + 127.5).astype(np.uint8)) ax.set_title(featurename) plt.waitforbuttonpress() """Deep Dreaming takes the backpropagated gradient activations and simply adds it to the image, running the same process again and again in a loop. There are many tricks one can add to this idea, such as infinitely zooming into the image by cropping and scaling, adding jitter by randomly moving the image around, or adding constraints on the total activations.""" og = plt.imread('street.png') crop = 2 img = preprocess(og)[np.newaxis, ...] layer = g.get_tensor_by_name(features[3] + ':0') n_els = layer.get_shape().as_list()[1] neuron_i = np.random.randint(1000) layer_vec = np.zeros((1, n_els)) layer_vec[0, neuron_i] = 1 neuron = tf.reduce_max(layer, 1) saliency = tf.gradients(tf.reduce_sum(neuron), x) for it_i in range(3): print(it_i) this_res = sess.run(saliency[0], feed_dict={ x: img, layer: layer_vec, 'vgg/dropout_1/random_uniform:0': [[1.0]], 'vgg/dropout/random_uniform:0': [[1.0]]}) grad = this_res[0] / np.mean(np.abs(grad)) img = img[:, crop:-crop - 1, crop:-crop - 1, :] img = imresize(img[0], (224, 224))[np.newaxis] img += grad plt.imshow(deprocess(img[0]))
def main(argv=None): # pylint: disable=unused-argument net = ResNet depth = 50 loader = VOCLoader('07', 'test') net = net(config=net_config, depth=depth, training=False) num_classes = 21 batch_size = args.batch_size img_size = args.image_size image_ph = tf.placeholder(shape=[1, img_size, img_size, 3], dtype=tf.float32, name='img_ph') net.create_trunk(image_ph) bboxer = PriorBoxGrid(net_config) net.create_multibox_head(num_classes) confidence = tf.nn.softmax(tf.squeeze(net.outputs['confidence'])) location = tf.squeeze(net.outputs['location']) good_bboxes = decode_bboxes(location, bboxer.tiling) detection_list = [] score_list = [] for i in range(1, num_classes): class_mask = tf.greater(confidence[:, i], args.conf_thresh) class_scores = tf.boolean_mask(confidence[:, i], class_mask) class_bboxes = tf.boolean_mask(good_bboxes, class_mask) K = tf.minimum(tf.size(class_scores), args.top_k_nms) _, top_k_inds = tf.nn.top_k(class_scores, K) top_class_scores = tf.gather(class_scores, top_k_inds) top_class_bboxes = tf.gather(class_bboxes, top_k_inds) final_inds = tf.image.non_max_suppression(top_class_bboxes, top_class_scores, max_output_size=50, iou_threshold=args.nms_thresh) final_class_bboxes = tf.gather(top_class_bboxes, final_inds) final_scores = tf.gather(top_class_scores, final_inds) detection_list.append(final_class_bboxes) score_list.append(final_scores) net.create_segmentation_head(num_classes) segmentation = tf.cast(tf.argmax(tf.squeeze(net.outputs['segmentation']), axis=-1), tf.int32) times = [] with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: sess.run(tf.global_variables_initializer()) ckpt_path = train_dir + '/model.ckpt-%i000' % args.ckpt log.debug("Restoring checkpoint %s" % ckpt_path) saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, ckpt_path) for i in range(200): im = loader.load_image(loader.get_filenames()[i]) im = imresize(im, (img_size, img_size)) im = im.reshape((1, img_size, img_size, 3)) st = time.time() sess.run([detection_list, score_list, segmentation], feed_dict={image_ph: im}) et = time.time() if i > 10: times.append(et-st) m = np.mean(times) s = np.std(times) fps = 1/m log.info("Mean={0:.2f}ms; Std={1:.2f}ms; FPS={2:.1f}".format(m*1000, s*1000, fps))