def test_s_test_sample(self): estimated_ihvp = s_test_sample( self.model, self.x_test, self.y_test, self.train_loader, gpu=self.gpu, damp=0.0, r=10, recursion_depth=10_000, batch_size=500, ) flat_estimated_ihvp = parameters_to_vector(estimated_ihvp) print("LiSSA") self.assertTrue(self.check_estimation(flat_estimated_ihvp)) print("Influence") inf_app, inf_rea = [], [] for i, (x_train, y_train) in enumerate( self.model.train_dataloader(batch_size=1, shuffle=False)): grads_train = grad_z(x_train, y_train, self.model, gpu=self.gpu) flat_grads_train = parameters_to_vector(grads_train) inf_app.append(-torch.sum(flat_grads_train * flat_estimated_ihvp / len(self.model.training_set)).item()) inf_rea.append(-torch.sum(flat_grads_train * self.real_ihvp / len(self.model.training_set)).item()) np.save("influence.npy", {'inf_app': inf_app, 'inf_rea': inf_rea})
def calc_grad_z(model, train_loader, save_pth=False, gpu=-1, start=0): """Calculates grad_z and can save the output to files. One grad_z should be computed for each training data sample. Arguments: model: pytorch model, for which s_test should be calculated train_loader: pytorch dataloader, which can load the train data save_pth: Path, path where to save the grad_z files if desired. Omitting this argument will skip saving gpu: int, device id to use for GPU, -1 for CPU (default) start: int, index of the first test index to use. default is 0 Returns: grad_zs: list of torch tensors, contains the grad_z tensors save_pth: Path, path where grad_z files were saved to or False if they were not saved.""" if save_pth and isinstance(save_pth, str): save_pth = Path(save_pth) if not save_pth: logging.info("ATTENTION: Not saving grad_z files!") grad_zs = [] for i in range(start, len(train_loader.dataset)): z, t = train_loader.dataset[i] z = train_loader.collate_fn([z]) t = train_loader.collate_fn([t]) grad_z_vec = grad_z(z, t, model, gpu=gpu) if save_pth: grad_z_vec = [g.cpu() for g in grad_z_vec] torch.save(grad_z_vec, save_pth.joinpath(f"{i}.grad_z")) else: grad_zs.append(grad_z_vec) display_progress("Calc. grad_z: ", i - start, len(train_loader.dataset) - start) return grad_zs, save_pth
def calc_influence_single( model, train_loader, test_loader, test_id_num, gpu, recursion_depth, r, s_test_vec=None, time_logging=False, ): """Calculates the influences of all training data points on a single test dataset image. Arugments: model: pytorch model train_loader: DataLoader, loads the training dataset test_loader: DataLoader, loads the test dataset test_id_num: int, id of the test sample for which to calculate the influence function gpu: int, identifies the gpu id, -1 for cpu recursion_depth: int, number of recursions to perform during s_test calculation, increases accuracy. r*recursion_depth should equal the training dataset size. r: int, number of iterations of which to take the avg. of the h_estimate calculation; r*recursion_depth should equal the training dataset size. s_test_vec: list of torch tensor, contains s_test vectors. If left empty it will also be calculated Returns: influence: list of float, influences of all training data samples for one test sample harmful: list of float, influences sorted by harmfulness helpful: list of float, influences sorted by helpfulness test_id_num: int, the number of the test dataset point the influence was calculated for""" # Calculate s_test vectors if not provided if not s_test_vec: z_test, t_test = test_loader.dataset[test_id_num] z_test = test_loader.collate_fn([z_test]) t_test = test_loader.collate_fn([t_test]) s_test_vec = s_test_sample( model, z_test, t_test, train_loader, gpu, recursion_depth=recursion_depth, r=r, ) # Calculate the influence function train_dataset_size = len(train_loader.dataset) influences = [] for i in tqdm(range(train_dataset_size)): z, t = train_loader.dataset[i] z = train_loader.collate_fn([z]) t = train_loader.collate_fn([t]) if time_logging: time_a = datetime.datetime.now() grad_z_vec = grad_z(z, t, model, gpu=gpu) if time_logging: time_b = datetime.datetime.now() time_delta = time_b - time_a logging.info(f"Time for grad_z iter:" f" {time_delta.total_seconds() * 1000}") with torch.no_grad(): tmp_influence = ( -sum([ #################### # TODO: potential bottle neck, takes 17% execution time # torch.sum(k * j).data.cpu().numpy() #################### torch.sum(k * j).data for k, j in zip(grad_z_vec, s_test_vec) ]) / train_dataset_size) influences.append(tmp_influence) # display_progress("Calc. influence function: ", i, train_dataset_size) harmful = np.argsort(influences) helpful = harmful[::-1] return influences, harmful.tolist(), helpful.tolist(), test_id_num
def setUpClass(cls) -> None: pl.seed_everything(0) cls.n_features = 10 cls.n_params = 2 * cls.n_features cls.model = LinearRegression(cls.n_features) gpus = 1 if torch.cuda.is_available() else 0 trainer = pl.Trainer(gpus=gpus, max_epochs=10) # trainer.fit(cls.model) print(tuple(cls.model.parameters())) use_sklearn = True if use_sklearn: train_dataset = DummyDataset(cls.n_features) clf = SklearnLR() clf.fit(train_dataset.data, train_dataset.targets) with torch.no_grad(): cls.model.linear.weight = torch.nn.Parameter( torch.tensor([clf.coef_], dtype=torch.float)) cls.model.linear.bias = torch.nn.Parameter( torch.tensor([clf.intercept_], dtype=torch.float)) cls.train_loader = cls.model.train_dataloader(batch_size=40000) # Setup test point data cls.test_idx = 8 cls.x_test = torch.tensor([cls.model.test_set.data[[cls.test_idx]]], dtype=torch.float) cls.y_test = torch.tensor([cls.model.test_set.targets[[cls.test_idx]]], dtype=torch.float) # Compute estimated IVHP cls.gpu = 1 if torch.cuda.is_available() else -1 # Compute anc flatten grad grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu) flat_grads = parameters_to_vector(grads) print("Grads:") print(flat_grads) # Make model functional params, names = make_functional(cls.model) # Make params regular Tensors instead of nn.Parameter params = tuple(p.detach().requires_grad_() for p in params) flat_params = parameters_to_vector(params) # Initialize Hessian h = torch.zeros([flat_params.shape[0], flat_params.shape[0]]) # Compute real IHVP for x_train, y_train in cls.train_loader: if cls.gpu >= 0: x_train, y_train = x_train.cuda(), y_train.cuda() def f(flat_params_): split_params = tensor_to_tuple(flat_params_, params) load_weights(cls.model, names, split_params) out = cls.model(x_train) loss = calc_loss(out, y_train) return loss batch_h = hessian(f, flat_params, strict=True) with torch.no_grad(): h += batch_h / float(len(cls.train_loader)) print("Hessian:") print(h) complete_x_train = cls.train_loader.dataset.data real_hessian = complete_x_train.T @ complete_x_train / complete_x_train.shape[ 0] * 2 print(real_hessian) print(np.linalg.norm(real_hessian - h.cpu().numpy()[:10, :10])) np.save("hessian_pytorch.npy", h.cpu().numpy()) # Make the model back `nn` with torch.no_grad(): load_weights(cls.model, names, params, as_params=True) inv_h = torch.inverse(h) print("Inverse Hessian") print(inv_h) cls.real_ihvp = inv_h @ flat_grads print("Real IHVP") print(cls.real_ihvp)
def setUpClass(cls) -> None: pl.seed_everything(0) cls.n_features = 10 cls.n_classes = 3 cls.n_params = cls.n_classes * cls.n_features + cls.n_features cls.wd = wd = 1e-2 # weight decay=1/(nC) cls.model = LogisticRegression(cls.n_classes, cls.n_features, wd=cls.wd) gpus = 1 if torch.cuda.is_available() else 0 trainer = pl.Trainer(gpus=gpus, max_epochs=10) # trainer.fit(self.model) use_sklearn = True if use_sklearn: cls.train_dataset = cls.model.training_set #DummyDataset(cls.n_features, cls.n_classes) multi_class = "multinomial" if cls.model.n_classes != 2 else "auto" clf = SklearnLogReg(C=1 / len(cls.train_dataset) / wd, tol=1e-8, max_iter=1000, multi_class=multi_class) clf.fit(cls.train_dataset.data, cls.train_dataset.targets) with torch.no_grad(): cls.model.linear.weight = torch.nn.Parameter( torch.tensor(clf.coef_, dtype=torch.float)) cls.model.linear.bias = torch.nn.Parameter( torch.tensor(clf.intercept_, dtype=torch.float)) # Setup test point data cls.test_idx = 5 cls.x_test = torch.tensor(cls.model.test_set.data[[cls.test_idx]], dtype=torch.float) cls.y_test = torch.tensor(cls.model.test_set.targets[[cls.test_idx]], dtype=torch.long) # Compute estimated IVHP cls.gpu = 1 if torch.cuda.is_available() else -1 if cls.gpu >= 0: cls.model = cls.model.cuda() cls.x_test = cls.x_test.cuda() cls.y_test = cls.y_test.cuda() cls.train_loader = cls.model.train_dataloader(batch_size=40000) # Compute anc flatten grad grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu) flat_grads = parameters_to_vector(grads) print("Grads:") print(flat_grads) # Make model functional params, names = make_functional(cls.model) # Make params regular Tensors instead of nn.Parameter params = tuple(p.detach().requires_grad_() for p in params) flat_params = parameters_to_vector(params) # Initialize Hessian h = torch.zeros([flat_params.shape[0], flat_params.shape[0]]) if cls.gpu == 1: h = h.cuda() # Compute real IHVP for x_train, y_train in cls.train_loader: if cls.gpu >= 0: x_train, y_train = x_train.cuda(), y_train.cuda() f = make_loss_f(cls.model, params, names, x_train, y_train, wd=wd) batch_h = hessian(f, flat_params, strict=True) with torch.no_grad(): h += batch_h / float(len(cls.train_loader)) h = (h + h.transpose(0, 1)) / 2 print("Hessian:") print(h) np.save("hessian_pytorch.npy", h.cpu().numpy()) from numpy import linalg as LA ei = LA.eig(h.cpu().numpy())[0] print('ei=', ei) print("max,min eigen value=", ei.max(), ei.min()) assert ei.min() > 0, "Error: Non-positive Eigenvalues" # Make the model back `nn` with torch.no_grad(): load_weights(cls.model, names, params, as_params=True) inv_h = torch.inverse(h) print("Inverse Hessian") print(inv_h) cls.real_ihvp = inv_h @ flat_grads print("Real IHVP") print(cls.real_ihvp)
def calc_influence_single( model, train_loader, test_loader, test_id_num, gpu, recursion_depth, r, damp=0.01, scale=25, s_test_vec=None, time_logging=False, exact=False, batch_size=1, ): """Calculates the influences of all training data points on a single test dataset image. Arugments: model: pytorch model train_loader: DataLoader, loads the training dataset test_loader: DataLoader, loads the test dataset test_id_num: int or list of int, id of the test samples for which to calculate the influence function gpu: int, identifies the gpu id, -1 for cpu recursion_depth: int, number of recursions to perform during s_test calculation, increases accuracy. r*recursion_depth should equal the training dataset size. r: int, number of iterations of which to take the avg. of the h_estimate calculation; r*recursion_depth should equal the training dataset size. s_test_vec: list of torch tensor, contains s_test vectors. If left empty it will also be calculated Returns: influence: list of float, influences of all training data samples for one test sample, which is the predicted change in loss after removing the test sample harmful: list of float, influences sorted by harmfulness helpful: list of float, influences sorted by helpfulness test_id_num: int or list of int, the id of the test dataset points the influence was calculated for""" # Calculate s_test vectors if not provided if s_test_vec is None: if isinstance(test_id_num, int): test_id_num = [test_id_num] z_test, t_test = list( zip(*[test_loader.dataset[i] for i in test_id_num])) z_test = test_loader.collate_fn(z_test) t_test = test_loader.collate_fn(t_test) s_test_vec = s_test_sample( model, z_test, t_test, train_loader, gpu, recursion_depth=recursion_depth, r=r, damp=damp, scale=scale, exact=exact, batch_size=batch_size, ) # Calculate the influence function train_dataset_size = len(train_loader.dataset) loss_diffs = [] # predicted value of new loss - original loss for i in tqdm(range(train_dataset_size)): z, t = train_loader.dataset[i] z = train_loader.collate_fn([z]) t = train_loader.collate_fn([t]) if time_logging: time_a = datetime.datetime.now() grad_z_vec = grad_z(z, t, model, gpu=gpu) if time_logging: time_b = datetime.datetime.now() time_delta = time_b - time_a logging.info(f"Time for grad_z iter:" f" {time_delta.total_seconds() * 1000}") with torch.no_grad(): tmp_loss_diff = ( sum([ #################### # TODO: potential bottle neck, takes 17% execution time # torch.sum(k * j).data.cpu().numpy() #################### torch.sum(k * j).data for k, j in zip(grad_z_vec, s_test_vec) ]) / train_dataset_size) loss_diffs.append(tmp_loss_diff.item()) harmful = np.argsort(loss_diffs) helpful = harmful[::-1] return np.array( loss_diffs), harmful.tolist(), helpful.tolist(), test_id_num