def forward_naive(self): perturbed_data = torch.zeros(self.data.shape).to(self.params.device) perturbed_data[:, :, 0] = self.data[:, :, 0] * (1 + self.perturbation) perturbed_data[:, :, 1:] = self.data[:, :, 1:] with torch.no_grad(): samples, sample_mu, _ = attack_utils.forward_model( model, perturbed_data, self.id_batch, self.v_batch, self.hidden, self.cell, self.params) aux_estimate = torch.zeros(self.data.shape[1], device=self.params.device) hidden = self.hidden cell = self.cell for t in range(self.params.test_predict_start): mu, sigma, hidden, cell = model(perturbed_data[t].unsqueeze(0), self.id_batch, hidden, cell) for i in range(samples.shape[0]): sample = samples[i] log_prob = attack_utils.forward_log_prob(model, sample, perturbed_data, self.id_batch, self.v_batch, hidden, cell, self.params) aux_estimate += sample[:, self.params.target] * log_prob aux_estimate /= float(samples.shape[0]) aux_estimate = aux_estimate.sum(0) return sample_mu, aux_estimate
def forward(self): perturbed_data = torch.zeros(self.data.shape).to(self.params.device) perturbed_data[:, :, 0] = self.data[:, :, 0] * (1 + self.perturbation) perturbed_data[:, :, 1:] = self.data[:, :, 1:] samples, sample_mu, sample_sigma = attack_utils.forward_model( model, perturbed_data, self.id_batch, self.v_batch, self.hidden, self.cell, self.params) return samples, sample_mu, sample_sigma
def attack_batch(self, data, id_batch, v_batch, labels, hidden, cell, estimator): batch_size = data.shape[1] with torch.no_grad(): _, original_mu, original_sigma = attack_utils.forward_model(model, data, id_batch, v_batch, hidden, cell, self.params) shape = (self.max_pert_len,) + data.shape[:2] best_perturbation = {"double": np.zeros(shape), "zero": np.zeros(shape)} c_shape = (self.max_pert_len, data.shape[1]) best_c = {"double": np.zeros(c_shape), "zero": np.zeros(c_shape)} best_distance = {"double": np.full(c_shape, np.inf), "zero": np.full(c_shape, np.inf)} out_shape = (self.max_pert_len,)+ original_mu.shape perturbed_output_mu = {"double": np.zeros(out_shape), "zero": np.zeros(out_shape)} perturbed_output_sigma = {"double": np.zeros(out_shape), "zero": np.zeros(out_shape)} modes = ["zero","double"] targets = {} lines = [] for mode in modes: print("mode",mode) # Loop on values of c to find successful attack with minimum perturbation for i in range(0, len(self.params.c), self.params.batch_c): bound = min(self.params.batch_c,len(self.params.c)-i) print("c",self.params.c[i:i+bound]) batched_data = data.repeat(1,bound,1) batched_id_batch = id_batch.repeat(1,bound) batched_v_batch = v_batch.repeat(bound,1) batched_labels = labels.repeat(bound) batched_hidden = hidden.repeat(1,bound,1) batched_cell = cell.repeat(1,bound,1) batched_c = torch.cat([self.params.c[i+j]*\ torch.ones(batch_size,device=self.params.device)\ for j in range(bound)],dim = 0) # Update the lines attack_module = AttackModule(self.model, self.params, batched_c, batched_data, batched_id_batch, batched_v_batch, batched_hidden, batched_cell) batched_target = attack_module.generate_target(batched_labels,mode) optimizer = optim.Adam([attack_module.perturbation], lr=self.params.learning_rate) # Iterate steps for k in range(self.params.n_iterations): if estimator == "ours": self.attack_step_ours(attack_module, optimizer, k, batched_target) elif estimator == "naive": self.attack_step_naive(attack_module, optimizer, k, batched_target) else: raise Exception("No such estimator") # Evaluate the attack # Run full number of samples on perturbed input to obtain perturbed output with torch.no_grad(): _,batched_perturbed_output,_ = attack_module() # Unbatch c to run everything from this for j in range(bound): c = self.params.c[i+j] left = batch_size*j right = batch_size*(j+1) target = batched_target[left:right] targets[mode] = target perturbed_output = batched_perturbed_output[left:right] v_batch = batched_v_batch[left:right] loss = attack_utils.AttackLoss(self.params,c,v_batch) norm_per_sample, distance_per_sample, loss_per_sample, norm, distance, loss = \ loss(attack_module.perturbation[:,left:right], perturbed_output, target) # Find numpy_norm = np.sqrt(utils.convert_from_tensor(norm_per_sample)) numpy_distance = utils.convert_from_tensor(distance_per_sample) numpy_perturbation = utils.convert_from_tensor( attack_module.perturbation.data[:,left:right]) #print("numpy perturbation",attack_module.perturbation.data[:,0]) #self.print(i, norm, distance, loss, norm_per_sample.shape[0]) for l in range(self.max_pert_len): indexes_best_c = np.logical_and(numpy_norm <= self.params.tolerance[l], numpy_distance < best_distance[mode][l]) best_perturbation[mode][l][:, indexes_best_c] = \ numpy_perturbation[:, indexes_best_c] best_distance[mode][l, indexes_best_c] = \ numpy_distance[indexes_best_c] best_c[mode][l, indexes_best_c] = c # Save norm and distance for c plot mean_numpy_norm = np.mean(numpy_norm) mean_distance = np.mean(np.sqrt(numpy_distance)) lines.append([estimator,mode,c,mean_numpy_norm,mean_distance]) with torch.no_grad(): # Update the lines attack_module = AttackModule(self.model, self.params, 0, data, id_batch, v_batch, hidden, cell) for l in range(self.max_pert_len): attack_module.perturbation.data = \ torch.tensor(best_perturbation[mode][l], device=self.params.device).float() _,aux1,aux2 = attack_module() perturbed_output_mu[mode][l] = aux1.cpu().numpy() perturbed_output_sigma[mode][l] = aux2.cpu().numpy() return original_mu,original_sigma,best_c, best_perturbation, \ best_distance, perturbed_output_mu, perturbed_output_sigma,\ targets,lines