def generate_image(self, pre_imgs, targets, **kwargs): output_VBP, probs, preds = self.VBP_model.generate_image( pre_imgs, targets, **kwargs) input_img = pre_imgs.detach().numpy() input_img = rescale_image(input_img) sal_maps = np.multiply(output_VBP, input_img) sal_maps = rescale_image(sal_maps.transpose(0, 3, 1, 2)) return sal_maps, probs, preds
def generate_smooth_var_grad(pre_imgs, targets, n, sigma, model, **kwargs): seed_everything() if 'layer' not in kwargs.keys(): kwargs['layer'] = None # make smooth_square_grad array smooth_var_grad = np.zeros(pre_imgs.shape[:1] + pre_imgs.shape[2:] + pre_imgs.shape[1:2]) # (batch_size, H, W, C) smooth_grad, _, _ = generate_smooth_grad(pre_imgs, targets, n, sigma, model, **kwargs) mean = 0 # mean, sigma mins = pre_imgs.detach().numpy().min(axis=(1, 2, 3)) maxs = pre_imgs.detach().numpy().max(axis=(1, 2, 3)) mean = [0] * pre_imgs.size(0) sigma = (sigma / (maxs - mins)).squeeze() for i in range(n): noise = np.array(list(map(normal_dist, pre_imgs, mean, sigma))) noisy_imgs = pre_imgs + torch.Tensor(noise) outputs, probs, preds = model.generate_image(noisy_imgs, targets, **kwargs) smooth_var_grad = smooth_var_grad + (outputs**2 - smooth_grad**2) smooth_var_grad = smooth_var_grad / n smooth_var_grad = rescale_image(smooth_var_grad.transpose(0, 3, 1, 2)) return smooth_var_grad, probs, preds
def __call__(self, img_info): img, bbox, label = img_info _, org_h, org_w = img.shape img = rescale_image(img, self.min_size, self.max_size) _, h, w= img.shape scale = min(h / org_h, w / org_w) bbox = rescale_box(bbox, (org_h, org_h), (h, w)) img, y_flip, x_flip = random_flip_image(img) bbox = flip_box(bbox, (h,w), y_flip, x_flip) return img, bbox, label, scale
def __getitem__(self, item): dicom = pydicom.dcmread(self.path[item]) dicom_data = get_dicom_meta(dicom) image = dicom.pixel_array image = rescale_image(image, dicom_data['RescaleSlope'], dicom_data['RescaleIntercept']) image = apply_window_policy(image, dicom_data, self.window_policy) image = resize(image, (512, 512)) if self.transforms: image = self.transforms(image=image)['image'] return np.swapaxes(image,0,2)
def generate_image(self, pre_imgs, targets, **kwargs): # default layer = 8 if 'layer' not in kwargs.keys() else kwargs['layer'] color = False if 'color' not in kwargs.keys() else kwargs['color'] # make saliency map by GradCAM & Guided Backprop output_GC, probs, preds = self.GC_model.generate_image(pre_imgs, targets, layer=layer, color=color) output_GB, _, _ = self.GB_model.generate_image(pre_imgs, targets) # GradCAM x Guided Backprop sal_maps = np.multiply(output_GC, output_GB) sal_maps = rescale_image(sal_maps.transpose(0,3,1,2)) return sal_maps, probs, preds
def generate_image(self, pre_imgs, targets, **kwargs): pre_imgs = Variable(pre_imgs, requires_grad=True) outputs = self.model(pre_imgs) self.model.zero_grad() one_hot_output = torch.zeros_like(outputs).scatter( 1, targets.unsqueeze(1), 1).detach() outputs.backward(gradient=one_hot_output) probs, preds = outputs.detach().max(1) sal_maps = rescale_image(pre_imgs.grad.numpy()) return (sal_maps, probs.numpy(), preds.numpy())
def generate_image(self, pre_imgs, targets, **kwargs): # default steps = 10 if 'steps' not in kwargs.keys() else kwargs['steps'] xbar_list = self.generate_images_on_linear_path(pre_imgs, steps) sal_maps = np.zeros(pre_imgs.size()) for xbar_image in xbar_list: single_integrated_grad, probs, preds = self.generate_gradients( xbar_image, targets) sal_maps = sal_maps + (single_integrated_grad / steps) sal_maps = rescale_image(sal_maps) return (sal_maps, probs, preds)
def generate_image(self, pre_imgs, targets, **kwargs): # default layer = 8 if 'layer' not in kwargs.keys() else kwargs['layer'] color = False if 'color' not in kwargs.keys() else kwargs['color'] # convert target type to LongTensor targets = torch.LongTensor(targets) # prediction pre_imgs = Variable(pre_imgs, requires_grad=True) outputs = self.model(pre_imgs) # calculate gradients self.model.zero_grad() one_hot_output = torch.zeros_like(outputs).scatter( 1, targets.unsqueeze(1), 1).detach() outputs.backward(gradient=one_hot_output) probs, preds = outputs.detach().max(1) gradients = self.gradients[layer].numpy() # A = w * conv_output convs = self.conv_outputs[layer].detach().numpy() weights = np.mean(gradients, axis=(2, 3)) weights = weights.reshape(weights.shape + ( 1, 1, )) gradcams = weights * convs gradcams = gradcams.sum(axis=1) # relu gradcams = np.maximum(gradcams, 0) # minmax scaling * 255 gradcams = rescale_image(gradcams, channel=False) # resize images colors = [color] * gradcams.shape[0] gradcams = np.array(list(map(resize_image, gradcams, pre_imgs, colors))) return (gradcams, probs.numpy(), preds.numpy())
def generate_image(self, pre_imgs, targets, **kwargs): # convert target type to LongTensor targets = torch.LongTensor(targets) # prediction pre_imgs = Variable(pre_imgs, requires_grad=True) outputs = self.model(pre_imgs) # calculate gradients self.model.zero_grad() one_hot_output = torch.zeros_like(outputs).scatter(1, targets.unsqueeze(1), 1).detach() outputs.backward(gradient=one_hot_output) probs, preds = outputs.detach().max(1) # rescale saliency map sal_maps = rescale_image(pre_imgs.grad.numpy()) return (sal_maps, probs.numpy(), preds.numpy())
def pipeline(img_name): path = f'./static/uploaded_images/{img_name}.jpeg' try: img = Image.open(path) except: return ValueError # preprocessing image img = rescale_image(img) thresh = threshold(img) cnts, bbox = find_bbox(thresh) img = crop_img(img, bbox) final_img = remove_noise_and_smooth(img) # finding text in image text = pytesseract.image_to_string(final_img) # searching for dates date = find_date(text) if date is None: return date return date.strftime("%Y-%m-%d")
def generate_image(self, pre_imgs, targets, **kwargs): # default layer = 0 if 'layer' not in kwargs.keys() else kwargs['layer'] # convert target type to LongTensor targets = torch.LongTensor(targets) # prediction outputs = self.model(pre_imgs).detach() probs, preds = outputs.max(1) # output deconvnet deconv_outputs = self.deconv_model(self.model.feature_maps[layer], layer, self.model.pool_locs) # denormalization deconv_outputs = deconv_outputs.data.numpy() deconv_outputs = rescale_image(deconv_outputs) return (deconv_outputs, probs.numpy(), preds.numpy())
def generate_image(self, pre_imgs, targets, **kwargs): # default steps = 10 if 'steps' not in kwargs.keys() else kwargs['steps'] # convert target type to LongTensor targets = torch.LongTensor(targets) # divide image xbar_list = self.generate_images_on_linear_path(pre_imgs, steps) sal_maps = np.zeros(pre_imgs.size()) # make saliency map from divided images for xbar_image in xbar_list: single_integrated_grad, probs, preds = self.generate_gradients(xbar_image, targets) sal_maps = sal_maps + (single_integrated_grad/steps) # rescale saliency map sal_maps = rescale_image(sal_maps) return (sal_maps, probs, preds)
def generate_image(self, pre_imgs, targets, **kwargs): # default layer = 0 if 'layer' not in kwargs.keys() else kwargs['layer'] # prediction outputs = self.model(pre_imgs).detach() probs, preds = outputs.max(1) # feature size num_feat = self.model.feature_maps[layer].shape[1] new_feat_map = self.model.feature_maps[layer].clone() # output deconvnet deconv_outputs = self.deconv_model(self.model.feature_maps[layer], layer, self.model.pool_locs) # denormalization deconv_outputs = deconv_outputs.data.numpy() deconv_outputs = rescale_image(deconv_outputs) return (deconv_outputs, probs.numpy(), preds.numpy())
def generate_image(self, pre_imgs, targets, **kwargs): # last layer idx layer = 11 if 'layer' not in kwargs.keys() else kwargs['layer'] color = False if 'color' not in kwargs.keys() else kwargs['color'] # convert target type to LongTensor targets = torch.LongTensor(targets) # prediction pre_imgs = Variable(pre_imgs, requires_grad=True) outputs = self.model(pre_imgs) probs, preds = outputs.detach().max(1) # last layer output last_layer_output = self.conv_outputs[layer].detach().numpy( ) # (B, C, H, W) # w_k w_k = self.model.cam_mlp.mlp[0].weight.detach().numpy( ) # (nb_class, C) b_w_k = np.zeros((targets.shape[0], w_k.shape[1])) for i in range(targets.shape[0]): b_w_k[i] = w_k[targets[i]] b_w_k = b_w_k.reshape(b_w_k.shape + ( 1, 1, )) # (B, C, 1, 1) # b_w_k x last layer output cams = (b_w_k * last_layer_output).sum(1) # minmax scaling * 255 cams = rescale_image(cams, channel=False) # resize to input image size colors = [color] * cams.shape[0] cams = np.array(list(map(resize_image, cams, pre_imgs, colors))) return (cams, probs.numpy(), preds.numpy())
def generate_image(self, pre_imgs, targets, **kwargs): # default layer = 11 if 'layer' not in kwargs.keys() else kwargs['layer'] color = False if 'color' not in kwargs.keys() else kwargs['color'] # convert target type to LongTensor targets = torch.LongTensor(targets) # prediction pre_imgs = Variable(pre_imgs, requires_grad=True) outputs = self.model(pre_imgs) probs, preds = outputs.detach().max(1) # n th convolution block output conv_out = self.conv_outputs[layer].mean(axis=1).detach().numpy() # minmax scaling * 255 conv_out = rescale_image(conv_out, channel=False) colors = [color] * conv_out.shape[0] gradcams = np.array(list(map(resize_image, conv_out, pre_imgs, colors))) return (gradcams, probs.numpy(), preds.numpy())
def __getitem__(self, index): image_id = self.ids[index] annos = ET.parse(os.path.join(self.data_dir, 'Annotations', image_id + '.xml')) bbox = [] label = [] for obj in annos.findall('object'): pos = obj.find('bndbox') bbox.append([int(pos.find(tag).text) for tag in ['ymin', 'xmin', 'ymax', 'xmax']]) name = obj.find('name').text.lower().strip() label.append(VOC_BBOX_LABEL_NAMES.index(name)) bbox = np.asarray(bbox).astype(np.float32) label = np.asarray(label).astype(np.int32) img_file = os.path.join(self.data_dir, 'JPEGImages', image_id + '.jpg') img = read_image(img_file) _, org_h, org_w = img.shape img = rescale_image(img, int(self.opt.min_size), int(self.opt.max_size)) _, h, w = img.shape scale = min(h / org_h, w / org_w) bbox = rescale_box(bbox, (org_h, org_h), (h, w)) return img, bbox, label, (org_h,org_w)
def rescale(self, target_distance): # rescale to fit a distance in meters self.image = utils.rescale_image(self.image, self.distance / target_distance) self.mask = utils.rescale_mask(self.mask, self.distance / target_distance) self.distance = target_distance self.reference_point = None
loss_disc_memory = [] optimizer_d = optim.Adam(disc.parameters(), lr=0.0002) optimizer_g = optim.Adam(gen.parameters(), lr=0.0002) test_input_z = Variable( torch.Tensor(np.random.normal(0, 1, (1, LATENT_SPACE_DIM)))) for e in range(EPOCH): loss_gene_memory = [] loss_disc_memory = [] if e % 3 == 0: with torch.no_grad(): fake_image_test = gen(test_input_z) fake_image_test = vectors_to_images(fake_image_test, 1, 28) plot_image(rescale_image(fake_image_test.data[0][0], 0.5)) for idx, (images, _) in enumerate(train_loader): images = torch.Tensor(images) size_batch = images.shape[0] # Training Discriminator optimizer_d.zero_grad() noise_vector = Variable( torch.Tensor(np.random.normal(0, 1, (size_batch, LATENT_SPACE_DIM)))) real_images = images.view(-1, IMAGE_SIZE**2) pred_real = disc(real_images) fake_image = gen(noise_vector).detach() pred_fake = disc(fake_image) loss_real = loss_fucntion(pred_real,
# Initialize the first state with the same 4 images current_state = np.array([obs, obs, obs, obs]) # Main episode loop t = 0 frame_counter += 1 while t < args.max_episode_length: # Stop the episode if it takes too long if frame_counter > args.max_frames_number: DQA.quit() # Render the game if args.video: img = utils.rescale_image(env.render()).convert("RGB") pyg_img = pygame.image.fromstring(img.tobytes(), img.size, img.mode) surface.fill((255, 255, 255)) surface.blit(pyg_img, (0, 0)) pygame.display.update() # Select an action using the DQA action = DQA.get_action(np.asarray([current_state])) # Observe reward and next state obs, reward, done, info = env.step(action) obs = utils.preprocess_observation(obs) next_state = utils.get_next_state(current_state, obs) frame_counter += 1
def _convert_obs(self, obs): obs = utils.rgb2gray(obs) obs = utils.rescale_image(obs) obs = utils.normalize_image(obs) return obs