def fit_agl(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], lam: Union[float, int], max_iters: int = 1000, smooth: Union[float, int] = 0, weights: List[Union[int, float]] = None): """fits the adaptive group lasso""" if self.beta is None and weights is None: print( "Initial beta estimation is not available, please run fit or fit_gic first." ) return None if weights is None: weights = self.compute_weights(self.beta) x = remove_intercept(x) x = numpy_to_torch(x) y = numpy_to_torch(y) x = self.normalize(x) x_basis = self.basis_expansion_(x, self.df, self.degree) group_size = [self.df] * len(weights) x_basis, group_size = add_intercept(x_basis, group_size) beta_agl = self.solve(x_basis, y, lam, group_size, max_iters, weights, smooth=smooth) self.beta_agl = beta_agl self.beta = beta_agl return self
def fit_2(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], num_lams: int, max_iters: int = 1000, an: Union[int, float] = None, smooth: Union[float, int] = 0): """fit group lasso then followed by adaptive group lasso, saves time for basis expansion""" x = numpy_to_torch(x) y = numpy_to_torch(y) x = remove_intercept(x) x = self.normalize(x) x_basis = self.basis_expansion_(x, self.df, self.degree) group_size = [self.df] * x.shape[1] x_basis, group_size = add_intercept(x_basis, group_size) result = self.fit_path(x_basis, y, group_size, num_lams, max_iters, smooth=smooth) beta_gl = result[min(list(result.keys()))] weights = self.compute_weights(beta_gl) result = self.fit_path(x_basis, y, group_size, num_lams, max_iters, smooth=smooth, weights=weights) best_gic = np.inf best_lam = 0 best_beta = None if an is None: an = np.log(x.shape[1]) / x.shape[0] for lam in result.keys(): beta_full = result[lam] gic = self.compute_gic(x_basis, y, beta_full, an, group_size) print(f"lam:{lam}, gic:{gic}") if gic < best_gic: best_lam = lam best_beta = beta_full best_gic = gic self.beta_agl_gic = best_beta self.beta = best_beta num_nz, nz = compute_nonzeros(best_beta, group_size) print( f"The best lam {best_lam} and the best gic {best_gic}. Finally selected {num_nz - 1} nonzeros: {nz}" ) return self
def eval_regions(bw, thr=0.0): mask_labeled = labelize(bw) regions = regionprops(mask_labeled) prop_saliency = [] doa = [] min_area = int(np.prod(target_shape) * thr) valid_idx = [] for pid, props in enumerate(regions): if props.area < min_area: continue valid_idx.append(pid) # extract proposal mask prop_mask = np.ones(bw.shape, dtype=np.float32) for u, v in props.coords: prop_mask[u, v] = 0. # compute contribution prop_mask = utils.numpy_to_torch(prop_mask, use_cuda=HAS_CUDA) perturbated_input = img.mul(prop_mask) + blurred_img.mul( 1 - prop_mask) drop = 1. - predict(model, perturbated_input, category) prop_saliency.append(drop) doa.append(drop / props.area) #print('Region saliency: {:.6f}'.format(prop_saliency[-1])) prop_saliency = np.asarray(prop_saliency) doa = np.asarray(doa) regions = np.asarray(regions) idx = np.argsort(prop_saliency)[::-1][:args.top_k] prop_saliency = prop_saliency[idx] doa[idx] regions = regions[valid_idx][idx] return regions, prop_saliency, doa
def fit(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], lam: Union[float, int], max_iters: int = 1000, weight: List[Union[int, float]] = None, smooth: Union[float, int] = 0): """fit the GAM model""" x = remove_intercept(x) x = numpy_to_torch(x) y = numpy_to_torch(y) x = self.normalize(x) x_basis = self.basis_expansion_(x, self.df, self.degree) group_size = [self.df] * x.shape[1] self.beta = self.solve(x_basis, y, lam, group_size, max_iters, weight, smooth=smooth) return self
def fit_gic(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], num_lams: int, max_iters: int = 1000, an: Union[int, float] = None, smooth: Union[int, float] = 0): """fits the group lasso with gic""" x = numpy_to_torch(x) y = numpy_to_torch(y) x = remove_intercept(x) x = self.normalize(x) x_basis = self.basis_expansion_(x, self.df, self.degree) group_size = [self.df] * x.shape[1] x_basis, group_size = add_intercept(x_basis, group_size) result = self.fit_path(x_basis, y, group_size, num_lams, max_iters, smooth=smooth) best_gic = np.inf if an is None: an = self.df * np.log(np.log(x.shape[0])) * np.log( x.shape[1]) / x.shape[0] for lam in result.keys(): gic = self.compute_gic(x_basis, y, result[lam], an, group_size) # print(f"lam:{lam}, gic:{gic}") if gic < best_gic: best_lam = lam best_beta = result[lam] best_gic = gic self.beta_gic = best_beta self.beta = best_beta print(f"The best lam {best_lam} and the best gic {best_gic}.") return self
def predict(self, x: Union[np.ndarray, torch.Tensor]): """predicts x""" x = numpy_to_torch(x) x = remove_intercept(x) x = self.normalize_test(x) x_basis = self.basis_expansion_(x, self.df, self.degree) x_basis = add_intercept(x_basis) eta = torch.matmul(x_basis, self.beta) if self.data_class == 'regression': return eta elif self.data_class == 'classification': return torch.where( sigmoid(eta) > 0.5, torch.ones(len(eta)), torch.zeros(len(eta))) elif self.data_class == 'gamma': return torch.exp(-eta) else: return torch.round(torch.exp(eta))
def plot_functions(self, x: Union[np.ndarray, torch.Tensor], cols: int = 5): """plot the estimated functions""" x = numpy_to_torch(x) x = remove_intercept(x) x_n = self.normalize_test(x) x_basis = self.basis_expansion_(x_n, self.df, self.degree) beta = self.beta[1:] nz, nzs = compute_nonzeros(beta, [self.df] * x.shape[1]) nrows = nz // cols + 1 fig, ax = plt.subplots(nrows=nrows, ncols=cols, figsize=(20, 12)) x_o = torch.exp(x) - 0.1 for i, j in enumerate(nzs): eta = torch.matmul(x_basis[:, self.df * j:self.df * (j + 1)], beta[self.df * j:self.df * (j + 1)].double()) ax.flatten()[i].scatter(x_o.detach().numpy()[:, j], eta.detach().numpy()) ax.flatten()[i].title.set_text(f"Variable {j + 1}") plt.show()
def compute_heatmap(model, original_img, params, mask_init, use_cuda=False, gpu_id=0, verbose=False): '''Compute image heatmaps according to: https://arxiv.org/abs/1704.03296 Interpretable Explanations of Black Boxes by Meaningful Perturbation Params: model : deep neural network or other black box model; e.g. VGG params : namedtuple/recordclass of settings original_img : input image, RGB-8bit mask_init : init heatmap use_cuda : enable/disable GPU usage ''' # scale between 0 and 1 with 32-bit color depth img = np.float32(original_img) / 255 # generate a perturbated version of the input image blurred_img_numpy = cv2.GaussianBlur(img, (11, 11), 10) # prepare image to feed to the model img = utils.preprocess_image(img, use_cuda, gpu_id=gpu_id) # original image blurred_img = utils.preprocess_image( blurred_img_numpy, use_cuda, gpu_id=gpu_id) # blurred version of input image mask = utils.numpy_to_torch(mask_init, use_cuda=use_cuda, gpu_id=gpu_id) # init mask upsample = torch.nn.Upsample(size=params.target_shape, mode='bilinear') blur = utils.BlurTensor(use_cuda, gpu_id=gpu_id) if use_cuda: upsample = upsample.cuda(gpu_id) # optimize only the heatmap optimizer = torch.optim.Adam([mask], lr=params.learning_rate) # compute the target output target_preds = model(img) targets = torch.nn.Softmax(dim=1)(target_preds) category, target_prob, label = utils.get_class_info(targets) if verbose: print("Category with highest probability:", (label, category, target_prob)) if params.target_id is not None: if category != params.target_id: print("Wrong classification! Skipping") return None loss_history = [] if verbose: print("Optimizing.. ") for i in range(params.max_iterations): # upsample the mask and use it # the mask is duplicated to have 3 channels since it is # single channel and is used with a 224*224 RGB image # NOTE: the upsampled mask is only used to compute the # perturbation on the input image upsampled_mask = upsample(mask) if params.blur: upsampled_mask = blur(upsampled_mask, 5) upsampled_mask = upsampled_mask.expand(1, 3, *params.target_shape) # use the (upsampled) mask to perturbated the input image # blend the median blurred image and the original (scaled) image # accordingly to the current (upsampled) mask perturbated_input = img.mul(upsampled_mask) + \ blurred_img.mul(1 - upsampled_mask) # gaussian noise with is added to the preprocssed image # at each iteration, inspired by google's smooth gradient # https://arxiv.org/abs/1706.03825 # https://pair-code.github.io/saliency/ noise = np.zeros(params.target_shape + (3, ), dtype=np.float32) if params.noise_sigma != 0: noise = noise + cv2.randn(noise, 0., params.noise_sigma) noise = utils.numpy_to_torch(noise, use_cuda=use_cuda, gpu_id=gpu_id) noisy_perturbated_input = perturbated_input + noise * params.noise_scale # compute current prediction preds = model(noisy_perturbated_input) outputs = torch.nn.Softmax(dim=1)(preds) # compute the loss and use the regularizers class_loss = outputs[0, category] l1_loss = params.l1_coeff * l1_reg(mask) tv_loss = params.tv_coeff * tv_reg(mask, params.tv_beta) lasso_loss = params.lasso_coeff * lasso_reg(mask) less_loss = params.less_coeff * less_reg(preds, target_preds) losses = [class_loss, l1_loss, tv_loss, lasso_loss, less_loss] total_loss = np.sum(losses) # convert loss tensors to scalars losses = [total_loss.data.cpu().squeeze().numpy()[0] ] + [l.data.cpu().numpy()[0] for l in losses] loss_history.append(losses) # update the optimization process optimizer.zero_grad() total_loss.backward() optimizer.step() # optional: clamping seems to give better results # should be useless, but numerical s**t happens mask.data.clamp_(0, 1) # upsample the computed final mask upsampled_mask = upsample(mask) if params.blur: upsampled_mask = blur(upsampled_mask, 5) perturbated_input = img.mul(upsampled_mask) + \ blurred_img.mul(1 - upsampled_mask) # compute the prediction probabilities before # and after the perturbation and masking outputs = torch.nn.Softmax(dim=1)(model(perturbated_input)) output_prob = outputs[0, category].data.cpu().squeeze().numpy()[0] # compute the prediction on the completely blurred image outputs = torch.nn.Softmax(dim=1)(model(blurred_img)) blurred_prob = outputs[0, category].data.cpu().squeeze().numpy()[0] return upsampled_mask, blurred_img_numpy, target_prob, output_prob, blurred_prob, np.asarray( loss_history), category
def compute_heatmap_using_superpixels(model, original_img, params, mask_init=None, use_cuda=False, gpu_id=0, verbose=False): img = np.float32(original_img) / 255 blurred_img_numpy = cv2.GaussianBlur(img, (11, 11), 10) # associate at each pixel the id of the corresponding superpixel segm_img = slic(img.copy()[:, :, ::-1], n_segments=2000, compactness=10, sigma=0.5) s2p = Superpixel2Pixel(segm_img, use_cuda, gpu_id=gpu_id) # generate superpixel initialization image nb_segms = np.max(segm_img) + 1 segm_init = np.zeros((nb_segms, ), dtype=np.float32) if mask_init is None: segm_init = segm_init + 0.5 else: for i in range(nb_segms): segm_init[i] = np.mean(mask_init[segm_img == i]) # segm_init[i] = 0.5 if segm_init[i] < 0.5 else segm_init[i] blur = utils.BlurTensor(use_cuda, gpu_id=gpu_id) # create superpixel image mask if use_cuda: segm = Variable(torch.from_numpy(segm_init).cuda(gpu_id), requires_grad=True) else: segm = Variable(torch.from_numpy(segm_init), requires_grad=True) img = utils.preprocess_image(img, use_cuda, gpu_id=gpu_id) # original image blurred_img = utils.preprocess_image( blurred_img_numpy, use_cuda, gpu_id=gpu_id) # blurred version of input image optimizer = torch.optim.Adam([segm], lr=params.learning_rate) target_preds = model(img) targets = torch.nn.Softmax(dim=1)(target_preds) category, target_prob, label = utils.get_class_info(targets) if verbose: print("Category with highest probability:", (label, category, target_prob)) loss_history = [] if verbose: print("Optimizing.. ") for i in range(params.max_iterations): upsampled_mask = s2p(segm).unsqueeze(0).unsqueeze(0) if params.blur: upsampled_mask = blur(upsampled_mask, 5) upsampled_mask = upsampled_mask.expand(1, 3, *params.target_shape) perturbated_input = img.mul(upsampled_mask) + \ blurred_img.mul(1 - upsampled_mask) noise = np.zeros(params.target_shape + (3, ), dtype=np.float32) if params.noise_sigma != 0: noise = noise + cv2.randn(noise, 0., params.noise_sigma) noise = utils.numpy_to_torch(noise, use_cuda=use_cuda, gpu_id=gpu_id) noisy_perturbated_input = perturbated_input + noise * params.noise_scale preds = model(noisy_perturbated_input) outputs = torch.nn.Softmax(dim=1)(preds) current_mask = segm # upsampled_mask class_loss = outputs[0, category] l1_loss = params.l1_coeff * l1_reg(current_mask) tv_loss = params.tv_coeff * tv_reg(upsampled_mask, params.tv_beta) lasso_loss = params.lasso_coeff * lasso_reg(current_mask) less_loss = params.less_coeff * less_reg(preds, target_preds) losses = [class_loss, l1_loss, tv_loss, lasso_loss, less_loss] total_loss = np.sum(losses) losses = [total_loss.data.cpu().squeeze().numpy()[0] ] + [l.data.cpu().numpy()[0] for l in losses] loss_history.append(losses) optimizer.zero_grad() total_loss.backward() optimizer.step() segm.data.clamp_(0, 1) if params.blur: upsampled_mask = blur(upsampled_mask, 5) perturbated_input = img.mul(upsampled_mask) + \ blurred_img.mul(1 - upsampled_mask) outputs = torch.nn.Softmax(dim=1)(model(perturbated_input)) output_prob = outputs[0, category].data.cpu().squeeze().numpy()[0] outputs = torch.nn.Softmax(dim=1)(model(blurred_img)) blurred_prob = outputs[0, category].data.cpu().squeeze().numpy()[0] return upsampled_mask, blurred_img_numpy, target_prob, output_prob, blurred_prob, np.asarray( loss_history), category
def solution_path(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], num_lams: int, group_size: Union[int, List[int]], max_iters: int = 1000, smooth: Union[float, int] = 0, recompute_hg: bool = True, weight: List[Union[int, List[int]]] = None) \ -> (List[torch.Tensor], List[float]): """ fits the model with a use specified lambda :param x: the design matrix :param y: the response :param num_lams: number of lambdas :param group_size: list of group sizes, or simple group size if all groups are of the same size :param max_iters: the maximum number of iterations :param smooth: smoothness parameter :param recompute_hg: whether to recompute hg :param weight: feature weights :return: coefficients """ x = numpy_to_torch(x) y = numpy_to_torch(y) self.group_size = group_size if isinstance(group_size, int): group_size = [1] + [group_size] * (x.shape[1] // group_size) if weight is None: weight = [0] + [1] * len(group_size) weights = [np.sqrt(group_size[i]) * weight[i] for i in range(len(group_size))] assert np.sum(group_size) == x.shape[1], "Sum of group sizes do not match number of variables." betas = [] lam_max = self.find_max_lambda(x, y, weights[1:], group_size[1:]) lam_max *= (1 + 1 / num_lams * 10) lams = list(np.linspace(0, lam_max, num_lams)) lams.remove(0) lams.sort(reverse=True) lam_last = None for lam in lams: if not betas: # beta_full = self.solve(x, y, lam, group_size, max_iters, weights, smooth, recompute_hg) beta_full = torch.tensor([self.null_estimate(y)] + [0] * (sum(group_size) - 1)) betas.append(beta_full) lam_last = lam else: beta = betas[-1] strong_index = self.strong_rule(x, y, beta, group_size, lam, lam_last, weights) x_s, group_size_s, weight_s = self.strong_x(x, group_size, strong_index, weights) # start = datetime.now().timestamp() beta_s = self.solve(x_s, y, lam, group_size_s, max_iters, weight_s, smooth, recompute_hg, weight_multiplied=True) # end = datetime.now().timestamp() # print(f"solve {end - start}") beta_full = self.strong_to_full_beta(beta_s, group_size, strong_index) v = self.fail_kkt(x, y, beta_full, group_size, lam, strong_index, weights) while len(v) > 0: strong_index = list(set(strong_index + v)) x_s, group_size_s, weight_s = self.strong_x(x, group_size, strong_index, weights) beta_s = self.solve(x_s, y, lam, group_size_s, max_iters, weight_s, smooth, recompute_hg, weight_multiplied=True) beta_full = self.strong_to_full_beta(beta_s, group_size, strong_index) v = self.fail_kkt(x, y, beta_full, group_size, lam, strong_index, weights) betas.append(beta_full) lam_last = lam num_nz, nz = compute_nonzeros(beta_full, group_size) print(f"Fitted lam = {lam}, {num_nz - 1} nonzero variables {nz}") if sum([group_size[i] for i in nz]) > 2 * x.shape[0]: lams = lams[:lams.index(lam) + 1] break return betas, lams,
def solve(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], lam: Union[float, int], group_size: Union[int, List[int]], max_iters: int = 1000, weight: List[Union[int, List[int]]] = None, smooth: Union[float, int] = 0, recompute_hg: bool = True, beta_warm: torch.Tensor = None, weight_multiplied: bool = False) -> torch.Tensor: """ fits the model with a use specified lambda :param x: the design matrix :param y: the response :param lam: the lambda for group lasso :param group_size: list of group sizes, or simple group size if all groups are of the same size :param weight: feature weights :param max_iters: the maximum number of iterations :param smooth: smoothness parameter :param recompute_hg: whether to recompute hg :param beta_warm: warm start of beta :return: coefficients """ if isinstance(group_size, int): group_size = [group_size] * (x.shape[1] // group_size) assert np.sum(group_size) == x.shape[1], \ f"Sum of group sizes {sum(group_size)} do not match number of variables {x.shape[1]}." assert lam >= 0, "Tuning parameter lam must be non-negative." """initialize parameters""" self.smoothness_penalty = smooth x = numpy_to_torch(x) y = numpy_to_torch(y) x, y = check_xy(x, y) x, group_size = add_intercept(x, group_size) if weight is None: weight = [1] * len(group_size) if not weight_multiplied: weights = [np.sqrt(group_size[i]) * weight[i] for i in range(len(group_size))] else: weights = weight[:] x1 = x.clone() # x1, self.R = self.group_orthogonalization(x, group_size) beta, error, iters, loss = self.initialize(group_size) if beta_warm is not None and beta_warm.shape == beta.shape: beta = beta_warm intercept_err = np.inf beta_old = beta.clone() num_groups = len(group_size) hg = None """start iterations""" while (error > self.tol or intercept_err > self.tol) and iters <= max_iters: iters += 1 for g in range(num_groups): group_idx_start, group_idx_end = self.find_group_index(group_size, g) if recompute_hg or hg is None or g <= 2: hg = self.compute_hg(x1, y, beta, group_idx_start, group_idx_end) derivative = self.compute_grad(x1, y, beta) if g == 0: d = self.compute_d(False, derivative, beta, lam, group_idx_start, group_idx_end, hg) alpha = self.line_search(x1, y, beta, d, group_size, g, lam) beta = beta + alpha * d else: beta[group_idx_start: group_idx_end] = self.close_form_QM(beta, derivative, hg, lam, group_idx_start, group_idx_end, weights[g], smooth) error = torch.norm(beta[1:] - beta_old[1:]) intercept_err = abs(beta[0].detach().numpy() - beta_old[0].detach().numpy()) beta_old = beta.clone() # print(f"error is {error}") # print(iters) # beta = self.group_orthogonalization_inverse(beta, self.R, group_size) return beta
#_, mask_bw_ref = cv2.threshold(mask_bw_ref, 132, 255, cv2.THRESH_BINARY) _, mask_bw_ref = cv2.threshold(mask_bw_ref, 204, 255, cv2.THRESH_BINARY) mask_numpy = cv2.imread(os.path.join(d, 'sharp/mask.png'), 1) mask_numpy = 1. - np.float32(mask_numpy) / 255 # binarize mask # revert the image to correctly compute the regions mask_bw = np.uint8(255. - mask_numpy[:, :, 0] * 255.) _, mask_bw = cv2.threshold(mask_bw, 128, 255, cv2.THRESH_BINARY) # convert images to torch tensors img = utils.preprocess_image(scaled_img, use_cuda=HAS_CUDA) blurred_img = utils.preprocess_image(blurred_img_numpy, use_cuda=HAS_CUDA) mask = utils.numpy_to_torch(mask_numpy, use_cuda=HAS_CUDA) if args.verbose: print('Computing classification confidence drop') target_probs = predict(model, img, None) category, target_prob, label = utils.get_class_info(target_probs) if args.verbose: print('Category with highest probability:', (label, category, target_prob)) perturbated_input = img.mul(mask) + blurred_img.mul(1 - mask) perturbated_prob = predict(model, perturbated_input, category) if args.verbose: print('Confidence after perturbing the input image: {:.6f}'.format( perturbated_prob)) '''