def log_linear_quantize(input, sf, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input), 0.0, 0.0 s = torch.sign(input) input0 = torch.log(torch.abs(input) + 1e-20) v = linear_quantize(input0, sf, bits) v = torch.exp(v) * s return v

def log_minmax_quantize(input, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input), 0.0, 0.0 s = torch.sign(input) input0 = torch.log(torch.abs(input) + 1e-20) v = min_max_quantize(input0, bits) v = torch.exp(v) * s return v

def _mu_law(self, x): m = self._variable(torch.FloatTensor(1)) m[:] = self.n_categories + 1 s = torch.sign(x) x = torch.abs(x) x = s * (torch.log(1 + (self.n_categories * x)) / torch.log(m)) return x

def kurtosis_score(x, dim=0): '''Test whether a dataset has normal kurtosis. This function tests the null hypothesis that the kurtosis of the population from which the sample was drawn is that of the normal distribution: ``kurtosis = 3(n-1)/(n+1)``. ripoff from: `scipy.stats.kurtosistest`. Args: a: Array of the sample data axis: Axis along which to compute test. Default is 0. If None, compute over the whole array `a`. Returns: statistic: The computed z-score for this test. p-value: A 2-sided chi squared probability for the hypothesis test. ''' x, n, dim = _x_n_dim(x, dim) if n < 20: raise ValueError( "Number of elements has to be >= 20 to compute kurtosis") b2 = (x**4).mean(dim) / (x**2).mean(dim)**2 E = 3.0 * (n - 1) / (n + 1) varb2 = 24.0 * n * (n - 2) * (n - 3) / ((n + 1)**2 * (n + 3) * (n + 5)) x = (b2 - E) / math.sqrt(varb2) sqrtbeta1 = 6.0 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) *\ math.sqrt((6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3))) A = 6.0 + 8.0 / sqrtbeta1 * \ (2.0 / sqrtbeta1 + math.sqrt(1 + 4.0 / (sqrtbeta1**2))) term1 = 1 - 2 / (9.0 * A) denom = 1 + x * math.sqrt(2 / (A - 4.0)) term2 = torch.sign(denom) * torch.pow((1 - 2.0 / A) / torch.abs(denom), 1 / 3.0) Z = (term1 - term2) / math.sqrt(2 / (9.0 * A)) return Z, 1 + torch.erf(-math.sqrt(0.5) * torch.abs(Z))

def torch_sign(value): """ Like :func:`torch.sign`` but also works for numbers. """ if isinstance(value, numbers.Number): return (value > 0) - (value < 0) return torch.sign(value)

def _predict(self, x, get_raw_results=False, **kwargs): if not isinstance(x, Variable): x = Variable(torch.from_numpy(np.asarray(x).astype(np.float32))) rs = x.mm(self._w) rs = rs.add_(self._b.expand_as(rs)).squeeze(1) if get_raw_results: return rs return torch.sign(rs)

def forward(self, input): # Hack: Force noise vectors to be function of input so they are put into # predict_net and not init_net when tracing with ONNX epsilon_input = torch.randn(1, input.size()[1], device=input.device) epsilon_output = torch.randn( self.out_dimension - input.size()[1] + input.size()[1], 1, device=input.device, ) epsilon_in = torch.sign(epsilon_input) * torch.sqrt(torch.abs(epsilon_input)) epsilon_out = torch.sign(epsilon_output) * torch.sqrt(torch.abs(epsilon_output)) # Add noise to bias and weights noise = torch.mul(epsilon_in, epsilon_out) bias = self.bias + self.sigma_bias * epsilon_out.t() weight = self.weight + self.sigma_weight * noise return input.matmul(weight.t()) + bias

def fgsm(classifier, x, loss_func,attack_params): epsilon = attack_params['eps'] #x_diff = 2 * 0.025 * (to_var(torch.rand(x.size())) - 0.5) #x_diff = torch.clamp(x_diff, -epsilon, epsilon) x_adv = to_var(x.data) c_pre = classifier(x_adv) loss = loss_func(c_pre) # gan_loss(c, is_real,compute_penalty=False) nx_adv = x_adv + epsilon*torch.sign(grad(loss, x_adv,retain_graph=False)[0]) x_adv = to_var(nx_adv.data) return x_adv

def tanh_quantize(input, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input) input = torch.tanh(input) # [-1, 1] input_rescale = (input + 1.0) / 2 #[0, 1] n = math.pow(2.0, bits) - 1 v = torch.floor(input_rescale * n + 0.5) / n v = 2 * v - 1 # [-1, 1] v = 0.5 * torch.log((1 + v) / (1 - v)) # arctanh return v

def linear_quantize(input, sf, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input) - 1 delta = math.pow(2.0, -sf) bound = math.pow(2.0, bits-1) min_val = - bound max_val = bound - 1 rounded = torch.floor(input / delta + 0.5) clipped_value = torch.clamp(rounded, min_val, max_val) * delta return clipped_value

def forward(self, input): self.epsison_input.normal_() self.epsilon_output.normal_() func = lambda x: torch.sign(x) * torch.sqrt(torch.abs(x)) eps_in = func(self.epsilon_input.data) eps_out = func(self.epsilon_output.data) bias = self.bias if bias is not None: bias = bias + self.sigma_bias * eps_out.t() noise_v = torch.mul(eps_in, eps_out) return F.linear(input, self.weight + self.sigma_weight * noise_v, bias)

def forward(self, input, doc_lens): """ :param input: (B*S, L) :param doc_lens: (B) :return: """ sent_lens = torch.sum(torch.sign(input), dim=1).data # (B*S); word id is a positive number and pad_id is 0 input = self.embed(input) # (B*S, L, D) # word level GRU input = self.word_RNN(input)[0] # (B*S, L, D) -> (B*S, L, 2*H), (B*S, 1, 2*H) -> (B*S, L, 2*H) # word_out = self.avg_pool1d(x, sent_lens) word_out = self.max_pool1d(input, sent_lens) # (B*S, L, 2*H) -> (B*S, 2*H) # make sent features(pad with zeros) input = self.pad_doc(word_out, doc_lens) # (B*S, 2*H) -> (B, max_doc_len, 2*H) # sent level GRU sent_out = self.sent_RNN(input)[0] # (B, max_doc_len, 2*H) -> (B, max_doc_len, 2*H) # docs = self.avg_pool1d(sent_out, doc_lens) # (B, 2*H) docs = self.max_pool1d(sent_out, doc_lens) # (B, 2*H) batch_probs = [] for index, doc_len in enumerate(doc_lens): # for idx, doc_len in (B) valid_hidden = sent_out[index, :doc_len, :] # (doc_len, 2*H) doc = torch.tanh(self.fc(docs[index])).unsqueeze(0) # (1, 2*H) s = torch.zeros(1, 2 * self.args.hidden_dim).to(opt.device) # (1, 2*H) probs = [] for position, h in enumerate(valid_hidden): h = h.view(1, -1) # (1, 2*H) # get position embeddings abs_index = torch.LongTensor([[position]]).to(opt.device) abs_features = self.abs_pos_embed(abs_index).squeeze(0) rel_index = int((position + 1) * 9.0 / doc_len) rel_index = torch.LongTensor([[rel_index]]).to(opt.device) rel_features = self.rel_pos_embed(rel_index).squeeze(0) # classification layer content = self.content(h) # (1, 2*H) -> (1, 1) salience = self.salience(h, doc) # (1, 2*H), (1, 2*H) -> (1, 1) novelty = -1 * self.novelty(h, torch.tanh(s)) # (1, 2*H), (1, 2*H) -> (1, 1) abs_p = self.abs_pos(abs_features) # (1, 1) rel_p = self.rel_pos(rel_features) # (1, 1) prob = torch.sigmoid(content + salience + novelty + abs_p + rel_p + self.bias) # (1, 1); [[0.35]] s = s + torch.mm(prob, h) # (1, 2*H) + (1, 1) * (1, 2*H) -> (1, 2*H) probs.append(prob) # S * (1, 1) batch_probs.append(torch.cat(probs).squeeze()) # (S*1, 1) -> (S) -> B * (S) # return torch.stack(batch_probs).squeeze() # B * (S) -> (B, S) return torch.cat(batch_probs).squeeze() # B * (S) -> (B * S)

def min_max_quantize(input, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input) - 1 min_val, max_val = input.min(), input.max() if isinstance(min_val, Variable): max_val = float(max_val.data.cpu().numpy()[0]) min_val = float(min_val.data.cpu().numpy()[0]) input_rescale = (input - min_val) / (max_val - min_val) n = math.pow(2.0, bits) - 1 v = torch.floor(input_rescale * n + 0.5) / n v = v * (max_val - min_val) + min_val return v

def forward(self, x, doc_lens): sent_lens = torch.sum(torch.sign(x), dim=1).data H = self.args.hidden_size x = self.embed(x) # (N,L,D) # word level GRU x = [conv(x.permute(0, 2, 1)) for conv in self.convs] x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] x = torch.cat(x, 1) # make sent features(pad with zeros) x = self.pad_doc(x, doc_lens) # sent level GRU sent_out = self.sent_RNN(x)[0] # (B,max_doc_len,2*H) docs = self.max_pool1d(sent_out, doc_lens) # (B,2*H) docs = self.fc(docs) probs = [] for index, doc_len in enumerate(doc_lens): valid_hidden = sent_out[index, :doc_len, :] # (doc_len,2*H) doc = docs[index].unsqueeze(0) s = Variable(torch.zeros(1, 2 * H)) if self.args.device is not None: s = s.cuda() for position, h in enumerate(valid_hidden): h = h.view(1, -1) # (1,2*H) # get position embeddings abs_index = Variable(torch.LongTensor([[position]])) if self.args.device is not None: abs_index = abs_index.cuda() abs_features = self.abs_pos_embed(abs_index).squeeze(0) rel_index = int(round((position + 1) * 9.0 / doc_len)) rel_index = Variable(torch.LongTensor([[rel_index]])) if self.args.device is not None: rel_index = rel_index.cuda() rel_features = self.rel_pos_embed(rel_index).squeeze(0) # classification layer content = self.content(h) salience = self.salience(h, doc) novelty = -1 * self.novelty(h, torch.tanh(s)) abs_p = self.abs_pos(abs_features) rel_p = self.rel_pos(rel_features) prob = F.sigmoid(content + salience + novelty + abs_p + rel_p + self.bias) s = s + torch.mm(prob, h) probs.append(prob) return torch.cat(probs).squeeze()

def extract_filters(self): #pdb.set_trace() out_channels = self.out_channels in_channels = self.in_channels // self.groups k_h = self.kernel_size[0] k_w = self.kernel_size[1] #for compressing the channel by 2 times #if in_channels != 3: # filtermap_pad_tmp = torch.cat((self.filtermap,self.filtermap),0) #else: # filtermap_pad_tmp = self.filtermap #filtermap_pad = torch.cat((filtermap_pad_tmp,filtermap_pad_tmp),0) #for not compressing the channel filtermap_pad = torch.cat((self.filtermap,self.filtermap),0) # not for 1x1 conv, do the padding on the spatial if self.filtermap.size()[1] > 1 and self.filtermap.size()[2] > 1: filtermap_pad_s1 = filtermap_pad[:,1,:] filtermap_pad_s1 = filtermap_pad_s1[:,None,:] filtermap_pad = torch.cat((filtermap_pad,filtermap_pad_s1),1) filtermap_pad_s2 = filtermap_pad[:,:,1] filtermap_pad_s2 = filtermap_pad_s2[:,:,None] filtermap_pad = torch.cat((filtermap_pad,filtermap_pad_s2),2) #pdb.set_trace() ids = self.ids.detach() conv_weight = filtermap_pad.view(-1,1).index_select(0,ids) conv_weight = conv_weight.view(out_channels,in_channels,k_h,k_w) if self.binary_filtermap: binary_conv_weight = conv_weight.clone() for nf in range(0,out_channels): float_filter = conv_weight[nf,:,:,:]; L1_norm = torch.norm(float_filter.view(-1,1),1); sign_filter = torch.sign(float_filter); binary_filter = sign_filter*L1_norm; binary_conv_weight[nf,:,:,:] = binary_filter return binary_conv_weight else: return conv_weight

def __call__(self, x_mu): """ Args: x_mu (FloatTensor/LongTensor or ndarray) Returns: x (FloatTensor or ndarray) """ mu = self.qc - 1. if isinstance(x_mu, np.ndarray): x = ((x_mu) / mu) * 2 - 1. x = np.sign(x) * (np.exp(np.abs(x) * np.log1p(mu)) - 1.) / mu elif isinstance(x_mu, (torch.Tensor, torch.LongTensor)): if isinstance(x_mu, torch.LongTensor): x_mu = x_mu.float() mu = torch.FloatTensor([mu]) x = ((x_mu) / mu) * 2 - 1. x = torch.sign(x) * (torch.exp(torch.abs(x) * torch.log1p(mu)) - 1.) / mu return x

def __call__(self, x): """ Args: x (FloatTensor/LongTensor or ndarray) Returns: x_mu (LongTensor or ndarray) """ mu = self.qc - 1. if isinstance(x, np.ndarray): x_mu = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu) x_mu = ((x_mu + 1) / 2 * mu + 0.5).astype(int) elif isinstance(x, (torch.Tensor, torch.LongTensor)): if isinstance(x, torch.LongTensor): x = x.float() mu = torch.FloatTensor([mu]) x_mu = torch.sign(x) * torch.log1p(mu * torch.abs(x)) / torch.log1p(mu) x_mu = ((x_mu + 1) / 2 * mu + 0.5).long() return x_mu

def generate(self, original_image, org_class, target_class, image_path): # I honestly dont know a better way to create a variable with specific value # Targeting the specific class im_label_as_var = Variable(torch.from_numpy(np.asarray([target_class ]))) # Define loss functions ce_loss = nn.CrossEntropyLoss() # Process image processed_image = preprocess_image( original_image) #original image를 var로 변환한것. # Start iteration for i in range(10): print('Iteration:', str(i)) # zero_gradients(x) # Zero out previous gradients # Can also use zero_gradients(x) processed_image.grad = None # Forward pass out = self.model(processed_image) #model을 거친 값. #print(out.dtype) #float #print(im_label_as_var.dtype) #int # Calculate CE loss pred_loss = ce_loss( out, im_label_as_var.long()) #type error 때문에 오류 발생했음. # Do backward pass pred_loss.backward() # Create Noise # Here, processed_image.grad.data is also the same thing is the backward gradient from # the first layer, can use that with hooks as well adv_noise = self.alpha * torch.sign( processed_image.grad.data) #노이즈 만들기. # Add noise to processed image processed_image.data = processed_image.data - adv_noise #기존의 이미지에 노이즈를 뺌. 다시 생성한 이미지~~ # Confirming if the image is indeed adversarial with added noise # This is necessary (for some cases) because when we recreate image # the values become integers between 1 and 255 and sometimes the adversariality # is lost in the recreation process # Generate confirmation image recreated_image = recreate_image(processed_image) # Process confirmation image prep_confirmation_image = preprocess_image( recreated_image) #새로 생성한 이미지를 var 형태로 변환. # Forward pass confirmation_out = self.model( prep_confirmation_image) #model로 evaluate 한 값 # Get prediction _, confirmation_prediction = confirmation_out.data.max( 1) #같은 모델로 돌렸을 때, 결과값(=예측값?) 좀 걸리는게,, 0과 1 계속 왔다갔다함. # Get Probability confirmation_confidence = \ nn.functional.softmax(confirmation_out)[0][confirmation_prediction].data.numpy()[0] #얼마나 일치하는지..softmax 서치필요 # Convert tensor to int confirmation_prediction = confirmation_prediction.numpy()[ 0] #0과 1로 표현됨.... -> fake or real 이었다! # Check if the prediction is different than the original if confirmation_prediction == target_class: print('Original image was predicted as:', org_class, 'with adversarial noise converted to:', confirmation_prediction, 'and predicted with confidence of:', confirmation_confidence) # Create the image for noise as: Original image - generated image original_image = cv2.resize(original_image, (224, 224)) recreate_img = recreated_image.transpose(0, 1, 2) noise_image = original_image - recreate_img #고작 이게 노이즈..? #cv2.imwrite('../generated/targeted/noise_from_' + image_path+'_'+str(org_class) + '_to_' + # str(confirmation_prediction) + '.jpg', noise_image) # Write image cv2.imwrite( '../generated/targeted/adv_img_from_' + image_path + '_' + str(org_class) + '_to_' + str(confirmation_prediction) + '.jpg', recreate_img) #FGSM 생성한 이미지의 Xception 모델 결과값(Fake/Real). #deepfake_output = self.model(torch.Tensor(recreate_img)) deepfake_output = self.model( processed_image ) #torch.from_numpy(np.flip(recreate_img, axis=0).copy()) _, FGSM_result = deepfake_output.data.max(1) print("FGSM result : ", FGSM_result.numpy()[0]) break return 1

def _f(self, x): return torch.sign(x) * torch.sqrt(torch.abs(x))

def _mu2float(self, mdata) : d=1/(self.nvals-1) y=[ torch.sign(x)*d*(torch.power(self.nvals,torch.abs(x))-1) for x in mdata ] return y

def nonzero(x): return torch.sign(torch.abs(x))

def linf_step(self, x, g, lr): return x + lr * torch.sign(g)

def updateBN(): for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.weight.grad.data.add_(args.s * torch.sign(m.weight.data)) # L1

def forward(self, x): gamma = torch.clamp(self.gamma, min=0.) return torch.sign(x) * torch.clamp(x.abs() - gamma, min=0.)

def mean_value_coordinates_3D(query, vertices, faces, verbose=False): """ Tao Ju et.al. MVC for 3D triangle meshes params: query (B,P,3) vertices (B,N,3) faces (B,F,3) return: wj (B,P,N) """ B, F, _ = faces.shape _, P, _ = query.shape _, N, _ = vertices.shape # u_i = p_i - x (B,P,N,3) uj = vertices.unsqueeze(1) - query.unsqueeze(2) # \|u_i\| (B,P,N,1) dj = torch.norm(uj, dim=-1, p=2, keepdim=True) uj = normalize(uj, dim=-1) # gather triangle B,P,F,3,3 ui = torch.gather(uj.unsqueeze(2).expand(-1,-1,F,-1,-1), 3, faces.unsqueeze(1).unsqueeze(-1).expand(-1,P,-1,-1,3)) # li = \|u_{i+1}-u_{i-1}\| (B,P,F,3) li = torch.norm(ui[:,:,:,[1, 2, 0],:] - ui[:, :, :,[2, 0, 1],:], dim=-1, p=2) eps = 2e-5 li = torch.where(li>=2, li-(li.detach()-(2-eps)), li) li = torch.where(li<=-2, li-(li.detach()+(2-eps)), li) # asin(x) is inf at +/-1 # θi = 2arcsin[li/2] (B,P,F,3) theta_i = 2*torch.asin(li/2) assert(check_values(theta_i)) # B,P,F,1 h = torch.sum(theta_i, dim=-1, keepdim=True)/2 # wi← sin[θi]d{i−1}d{i+1} # (B,P,F,3) ci ← (2sin[h]sin[h−θi])/(sin[θ_{i+1}]sin[θ_{i−1}])−1 ci = 2*torch.sin(h)*torch.sin(h-theta_i)/(torch.sin(theta_i[:,:,:,[1, 2, 0]])*torch.sin(theta_i[:,:,:,[2, 0, 1]]))-1 # NOTE: because of floating point ci can be slightly larger than 1, causing problem with sqrt(1-ci^2) # NOTE: sqrt(x)' is nan for x=0, hence use eps eps = 1e-5 ci = torch.where(ci>=1, ci-(ci.detach()-(1-eps)), ci) ci = torch.where(ci<=-1, ci-(ci.detach()+(1-eps)), ci) # si← sign[det[u1,u2,u3]]sqrt(1-ci^2) # (B,P,F)*(B,P,F,3) si = torch.sign(torch.det(ui)).unsqueeze(-1)*torch.sqrt(1-ci**2) # sqrt gradient nan for 0 assert(check_values(si)) # (B,P,F,3) di = torch.gather(dj.unsqueeze(2).squeeze(-1).expand(-1,-1,F,-1), 3, faces.unsqueeze(1).expand(-1,P,-1,-1)) assert(check_values(di)) # if si.requires_grad: # vertices.register_hook(save_grad("mvc/dv")) # li.register_hook(save_grad("mvc/dli")) # theta_i.register_hook(save_grad("mvc/dtheta")) # ci.register_hook(save_grad("mvc/dci")) # si.register_hook(save_grad("mvc/dsi")) # di.register_hook(save_grad("mvc/ddi")) # wi← (θi −c[i+1]θ[i−1] −c[i−1]θ[i+1])/(disin[θi+1]s[i−1]) # B,P,F,3 # CHECK is there a 2* in the denominator wi = (theta_i-ci[:,:,:,[1,2,0]]*theta_i[:,:,:,[2,0,1]]-ci[:,:,:,[2,0,1]]*theta_i[:,:,:,[1,2,0]])/(di*torch.sin(theta_i[:,:,:,[1,2,0]])*si[:,:,:,[2,0,1]]) # if ∃i,|si| ≤ ε, set wi to 0. coplaner with T but outside # ignore coplaner outside triangle # alternative check # (B,F,3,3) # triangle_points = torch.gather(vertices.unsqueeze(1).expand(-1,F,-1,-1), 2, faces.unsqueeze(-1).expand(-1,-1,-1,3)) # # (B,P,F,3), (B,1,F,3) -> (B,P,F,1) # determinant = dot_product(triangle_points[:,:,:,0].unsqueeze(1)-query.unsqueeze(2), # torch.cross(triangle_points[:,:,:,1]-triangle_points[:,:,:,0], # triangle_points[:,:,:,2]-triangle_points[:,:,:,0], dim=-1).unsqueeze(1), dim=-1, keepdim=True).detach() # # (B,P,F,1) # sqrdist = determinant*determinant / (4 * sqrNorm(torch.cross(triangle_points[:,:,:,1]-triangle_points[:,:,:,0], triangle_points[:,:,:,2]-triangle_points[:,:,:,0], dim=-1), keepdim=True)) wi = torch.where(torch.any(torch.abs(si) <= 1e-5, keepdim=True, dim=-1), torch.zeros_like(wi), wi) # wi = torch.where(sqrdist <= 1e-5, torch.zeros_like(wi), wi) # if π −h < ε, x lies on t, use 2D barycentric coordinates # inside triangle inside_triangle = (PI-h).squeeze(-1)<1e-4 # set all F for this P to zero wi = torch.where(torch.any(inside_triangle, dim=-1, keepdim=True).unsqueeze(-1), torch.zeros_like(wi), wi) # CHECK is it di https://www.cse.wustl.edu/~taoju/research/meanvalue.pdf or li http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.516.1856&rep=rep1&type=pdf wi = torch.where(inside_triangle.unsqueeze(-1).expand(-1,-1,-1,wi.shape[-1]), torch.sin(theta_i)*di[:,:,:,[2,0,1]]*di[:,:,:,[1,2,0]], wi) # sum over all faces face -> vertex (B,P,F*3) -> (B,P,N) wj = scatter_add(wi.reshape(B,P,-1).contiguous(), faces.unsqueeze(1).expand(-1,P,-1,-1).reshape(B,P,-1), 2, out_size=(B,P,N)) # close to vertex (B,P,N) close_to_point = dj.squeeze(-1) < 1e-8 # set all F for this P to zero wj = torch.where(torch.any(close_to_point, dim=-1, keepdim=True), torch.zeros_like(wj), wj) wj = torch.where(close_to_point, torch.ones_like(wj), wj) # (B,P,1) sumWj = torch.sum(wj, dim=-1, keepdim=True) sumWj = torch.where(sumWj==0, torch.ones_like(sumWj), sumWj) wj_normalised = wj / sumWj # if wj.requires_grad: # saved_variables["mvc/wi"] = wi # wi.register_hook(save_grad("mvc/dwi")) # wj.register_hook(save_grad("mvc/dwj")) if verbose: return wj_normalised, wi else: return wj_normalised

alpha = 1e-4 epochs = 170 cnt = 0 for filename in os.listdir(input_dir): if filename[-4:] != '.png': continue cnt += 1 image = Image.open(os.path.join(input_dir, filename)).convert('RGB') target = torch.tensor([0]) trans = transforms.Compose([transforms.ToTensor()]) rev = transforms.ToPILImage() image = trans(image) image = image.unsqueeze(0) image.requires_grad_() eta = torch.zeros(image.shape) for _ in range(epochs): print(cnt, _) output = model(image + eta) loss = criterion(output, target) loss.backward() eta += -alpha * torch.sign(image.grad.data) image.grad.data.zero_() adv = image + eta adv = adv.squeeze() adv = rev(adv) adv.save(os.path.join(output_dir, filename))

def trades_loss(model, loss_fn, x_natural, y, norm, optimizer, step_size=0.003, epsilon=0.031, perturb_steps=10, beta=1.0, version=None, device="gpu"): # define KL-loss #criterion_kl = nn.KLDivLoss(size_average=False) criterion_kl = nn.KLDivLoss(reduction='sum') model.eval() batch_size = len(x_natural) # generate adversarial example if norm == np.inf: x_adv = x_natural.detach() + 0.001 * torch.randn( x_natural.shape).to(device).detach() for _ in range(perturb_steps): x_adv.requires_grad_() with torch.enable_grad(): loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1), F.softmax(model(x_natural), dim=1)) grad = torch.autograd.grad(loss_kl, [x_adv])[0] x_adv = x_adv.detach() + step_size * torch.sign(grad.detach()) x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon) x_adv = torch.clamp(x_adv, 0.0, 1.0) elif norm == 2: delta = 0.001 * torch.randn(x_natural.shape).to(device).detach() delta = Variable(delta.data, requires_grad=True) # Setup optimizers optimizer_delta = optim.SGD([delta], lr=epsilon / perturb_steps * 2) for _ in range(perturb_steps): adv = x_natural + delta # optimize optimizer_delta.zero_grad() with torch.enable_grad(): loss = (-1) * criterion_kl(F.log_softmax(model(adv), dim=1), F.softmax(model(x_natural), dim=1)) loss.backward() # renorming gradient grad_norms = delta.grad.view(batch_size, -1).norm(p=2, dim=1) delta.grad.div_(grad_norms.view(-1, 1, 1, 1)) # avoid nan or inf if gradient is 0 if (grad_norms == 0).any(): delta.grad[grad_norms == 0] = torch.randn_like( delta.grad[grad_norms == 0]) optimizer_delta.step() # projection delta.data.add_(x_natural) delta.data.clamp_(0, 1).sub_(x_natural) delta.data.renorm_(p=2, dim=0, maxnorm=epsilon) x_adv = Variable(x_natural + delta, requires_grad=False) else: x_adv = torch.clamp(x_adv, 0.0, 1.0) model.train() x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False) # zero gradient optimizer.zero_grad() # calculate robust loss outputs = model(x_natural) loss_natural = loss_fn(outputs, y) loss_robust = (1.0 / batch_size) * criterion_kl( F.log_softmax(model(x_adv), dim=1), F.softmax(model(x_natural), dim=1)) if version is not None and "sum" in version: loss = loss_natural + beta * batch_size * loss_robust else: loss = loss_natural + beta * loss_robust return outputs, loss

def backward(ctx, grad_output): if lin_back: grad_input = grad_output.clone() return grad_input return torch.sign(grad_output) * torch.pow(torch.ones_like(grad_output)*2, torch.clamp(torch.round(torch.log2(torch.abs(grad_output))), fsr-2**bitwight,fsr ))

def forward(ctx, input): if with_sign: return torch.sign(input)*torch.pow(torch.ones_like(input)*2, torch.clamp(torch.round(torch.log2(torch.abs(input))), fsr-2**bitwight ,fsr )) return torch.pow(torch.ones_like(input)*2, torch.clamp(torch.round(torch.log2(torch.abs(input))), fsr-2**bitwight ,fsr ))

def generate_with_adv(model, data_feed, config, evaluator, num_batch=1, dest_f=None): eos_id = model.rev_vocab[EOS] model.eval() logger.info("Generation with Adversarial: {} batches".format(data_feed.num_batch if num_batch is None else num_batch)) adv_nll = 0.0 corr_cnt = 0.0 adv_cnt = 0.0 corr_pi_cnt = 0.0 sys_pi_cnt = 1e-18 sys_corr_pi_cnt = 0.0 usr_pi_cnt = 1e-18 usr_corr_pi_cnt = 0.0 if num_batch is not None: gen_with_cond(model, data_feed, config, num_batch) data_feed.epoch_init(config, shuffle=False, verbose=False) while True: batch = data_feed.next_batch() if batch is None: break outputs, labels = model(batch, mode=GEN, gen_type=config.gen_type) try: y_ids = outputs[DecoderRNN.KEY_LATENT] qy_ids = outputs[DecoderRNN.KEY_RECOG_LATENT] log_py = outputs[DecoderRNN.KEY_POLICY] _, max_py = torch.max(log_py, dim=1) corr_flag = max_py == qy_ids.view(-1) corr_flag = corr_flag.cpu().data.numpy() corr_pi_cnt += np.sum(corr_flag) for b_id in range(config.batch_size): for y_idx in range(config.y_size): idx = b_id * config.y_size + y_idx if model.rev_vocab.get(USR) in batch.outputs[b_id]: usr_pi_cnt += 1 if corr_flag[idx]: usr_corr_pi_cnt += 1 else: sys_pi_cnt += 1 if corr_flag[idx]: sys_corr_pi_cnt += 1 except Exception as e: logger.error(e) logger.info("No latent. Skip") return seq_terminate = Variable(torch.zeros(config.batch_size, 1)) if config.use_gpu: seq_terminate = seq_terminate.cuda().long() # find adversarial loss here. EOS the sequence norm_out_utts = [] for t in outputs[DecoderRNN.KEY_SEQUENCE]: is_eos = t == eos_id seq_terminate = torch.sign(seq_terminate+is_eos.long()) norm_out_utts.append(((1.0-seq_terminate)*t).long()) norm_out_utts = torch.cat(norm_out_utts, dim=1) qzx_results = model.qzx_forward(norm_out_utts) log_qy = F.log_softmax(qzx_results.qy_logits, dim=1) nll = -1.0 * log_qy.gather(1, y_ids) _, max_qy = torch.max(log_qy, dim=1) corr_cnt += torch.sum(max_qy==y_ids.view(-1)).cpu().data.numpy() adv_nll += torch.sum(nll).cpu().data.numpy() adv_cnt += log_qy.size(0) # print adversarial PPL avg_adv_nll = adv_nll/adv_cnt acc = corr_cnt/adv_cnt pi_acc = corr_pi_cnt/adv_cnt usr_pi_acc = usr_corr_pi_cnt/usr_pi_cnt sys_pi_acc = sys_corr_pi_cnt/sys_pi_cnt logger.info("Adversarial NLL {}, PPL {} Acc {} PI Acc {} Sys Acc {} Usr Acc {}" .format(avg_adv_nll, np.exp(avg_adv_nll), acc, pi_acc, sys_pi_acc, usr_pi_acc)) logger.info("Generation Done") return pi_acc

def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') InceptionResnet_model_1 = InceptionResnetV1( pretrained='vggface2').eval().to(device) print('load InceptionResnet-vggface2.pt successfully') InceptionResnet_model_2 = InceptionResnetV1( pretrained='casia-webface').eval().to(device) print('load InceptionResnet-casia-webface.pt successfully') IR_50_model_1 = IR_50([112, 112]) IR_50_model_1.load_state_dict( torch.load( '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/backbone_ir50_asia.pth' )) IR_50_model_1.eval().to(device) print('load IR_50 successfully') IR_152_model_1 = IR_152([112, 112]) IR_152_model_1.load_state_dict( torch.load( '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/Backbone_IR_152_Epoch_112_Batch_2547328_Time_2019-07-13-02-59_checkpoint.pth' )) IR_152_model_1.eval().to(device) print('load IR_152 successfully') # IR_152_model_2 = IR_152([112, 112]) # IR_152_model_2.load_state_dict( # torch.load( # '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/Head_ArcFace_Epoch_112_Batch_2547328_Time_2019-07-13-02-59_checkpoint.pth')) # IR_152_model_2.eval().to(device) # print('load IR_152_ArcFace successfully') import insightface Insightface_iresnet34 = insightface.iresnet34(pretrained=True) Insightface_iresnet34.eval().to(device) print('load Insightface_iresnet34 successfully') Insightface_iresnet50 = insightface.iresnet50(pretrained=True) Insightface_iresnet50.eval().to(device) print('load Insightface_iresnet50 successfully') Insightface_iresnet100 = insightface.iresnet100(pretrained=True) Insightface_iresnet100.eval().to(device) print('load Insightface_iresnet100 successfully') ###########################vgg16 from Face_recognition.vgg16.vgg16 import CenterLossModel, loadCheckpoint vgg16_checkpoint = loadCheckpoint( '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/vgg16/model' ) VGG16 = CenterLossModel(embedding_size=512, num_classes=712, checkpoint=vgg16_checkpoint).eval().to(device) print('load VGG16 successfully') ###########################resnet34 criterion = nn.MSELoss() # cpu # collect all images to attack paths = [] picpath = '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/images' for root, dirs, files in os.walk(picpath): for f in files: paths.append(os.path.join(root, f)) random.shuffle(paths) # paras eps = 1 steps = 50 output_path = './output_img' momentum = 1.0 for path in tqdm(paths): start = time.time() print('processing ' + path + ' ===============>') image = Image.open(path) # define paras # in_tensor is origin tensor of image # in_variable changes with gradient in_tensor = img2tensor(np.array(image)) # print(in_tensor.shape) in_variable = in_tensor.detach().to(device) in_tensor = in_tensor.squeeze().to(device) adv = None # in_tensor= img2tensor_224(image) # # print(in_tensor.shape) # in_variable = in_tensor.to(device) # in_tensor = in_tensor.squeeze().to(device) # adv = None # # # origin feature origin_InceptionResnet_model_1 = InceptionResnet_model_1(in_variable) origin_InceptionResnet_model_2 = InceptionResnet_model_2(in_variable) origin_IR_50_model_1 = IR_50_model_1(in_variable) origin_IR_152_model_1 = IR_152_model_1(in_variable) # # origin_IR_152_model_2 = IR_152_model_2(in_variable) origin_Insightface_iresent34 = Insightface_iresnet34(in_variable) origin_Insightface_iresent50 = Insightface_iresnet50(in_variable) origin_Insightface_iresent100 = Insightface_iresnet100(in_variable) ####### origin_VGG16 = VGG16.forward_GetFeature(in_variable) # 1. untarget attack -> random noise # 2. target attack -> x = alpha * target + (1 - alpha) * x perturbation = torch.Tensor(3, 112, 112).uniform_(-0.1, 0.1).to(device) in_variable = in_variable + perturbation in_variable.data.clamp_(-1.0, 1.0) in_variable.requires_grad = True g_noise = 0.0 # sum gradient for i in range(steps): # print('step: ' + str(i)) # in_variable = in_variable.to(device) out_InceptionResnet_model_1 = InceptionResnet_model_1(in_variable) out_InceptionResnet_model_2 = InceptionResnet_model_2(in_variable) out_IR_50_model_1 = IR_50_model_1(in_variable) out_IR_152_model_1 = IR_152_model_1(in_variable) # # out_IR_152_model_2 = IR_152_model_2(in_variable) out_Insightface_iresent34 = Insightface_iresnet34(in_variable) out_Insightface_iresent50 = Insightface_iresnet50(in_variable) out_Insightface_iresent100 = Insightface_iresnet100(in_variable) ##### out_VGG16 = VGG16.forward_GetFeature(in_variable) ##### loss = criterion(origin_InceptionResnet_model_1, out_InceptionResnet_model_1) + \ criterion(origin_InceptionResnet_model_2, out_InceptionResnet_model_2) + \ criterion(origin_IR_50_model_1, out_IR_50_model_1) + \ criterion(origin_IR_152_model_1, out_IR_152_model_1) + \ criterion(origin_Insightface_iresent34, out_Insightface_iresent34) + \ criterion(origin_Insightface_iresent50, out_Insightface_iresent50) + \ criterion(origin_Insightface_iresent100, out_Insightface_iresent100) + \ criterion(origin_VGG16, out_VGG16) # print('loss : %f' % loss) # compute gradients loss.backward(retain_graph=True) g_noise = momentum * g_noise + (in_variable.grad / in_variable.grad.data.norm(1)) g_noise = g_noise / g_noise.data.norm(1) if i % 2 == 0: kernel = gkern(3, 2).astype(np.float32) gaussian_blur1 = GaussianBlur(kernel).to(device) g_noise = gaussian_blur1(g_noise) g_noise = torch.clamp(g_noise, -0.1, 0.1) else: addition = TVLoss() g_noise = addition(g_noise) in_variable.data = in_variable.data + ( (eps / 255.) * torch.sign(g_noise) ) # * torch.from_numpy(mat).unsqueeze(0).float() in_variable.grad.data.zero_() # unnecessary # deprocess image adv = in_variable.data.cpu().numpy()[0] # (3, 112, 112) perturbation = (adv - in_tensor.cpu().numpy()) adv = adv * 128.0 + 127.0 adv = adv.swapaxes(0, 1).swapaxes(1, 2) adv = adv[..., ::-1] adv = np.clip(adv, 0, 255).astype(np.uint8) sample_dir = '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/main_4_output-8-29/' if not os.path.exists(sample_dir): os.makedirs(sample_dir) advimg = sample_dir + path.split('/')[-1].split('.')[-2] + '.jpg' cv2.imwrite(advimg, adv) print("save path is " + advimg) print('cost time is %.2f s ' % (time.time() - start))

def main(): sample_dir = './test_DI-2-FGSM-3/' if not os.path.exists(sample_dir): os.makedirs(sample_dir) InceptionResnet_model_1 = InceptionResnetV1( pretrained='vggface2').eval().to(device_0) print('load InceptionResnet-vggface2.pt successfully') # InceptionResnet_model_2 = InceptionResnetV1(pretrained='casia-webface').eval().to(device_0) # print('load InceptionResnet-casia-webface.pt successfully') IR_50_model_1 = IR_50([112, 112]) IR_50_model_1.load_state_dict( torch.load( '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/backbone_ir50_asia.pth' )) IR_50_model_1.eval().to(device_0) print('load IR_50 successfully') # IR_152_model_1 = IR_152([112, 112]) # IR_152_model_1.load_state_dict( # torch.load( # '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/Backbone_IR_152_Epoch_112_Batch_2547328_Time_2019-07-13-02-59_checkpoint.pth')) # IR_152_model_1.eval().to(device_0) # print('load IR_152 successfully') # IR_SE_50 = Backbone(50,mode='ir_se').eval().to(device_1) # print('load IR_SE_50 successfully') # mobileFaceNet = MobileFaceNet(512).eval().to(device_0) # print('load mobileFaceNet successfully') Insightface_iresnet34 = insightface.iresnet34(pretrained=True) Insightface_iresnet34.eval().to(device_1) print('load Insightface_iresnet34 successfully') Insightface_iresnet50 = insightface.iresnet50(pretrained=True) Insightface_iresnet50.eval().to(device_1) print('load Insightface_iresnet50 successfully') Insightface_iresnet100 = insightface.iresnet100(pretrained=True) Insightface_iresnet100.eval().to(device_1) print('load Insightface_iresnet100 successfully') # ##########################vgg16 # from Face_recognition.vgg16.vgg16 import CenterLossModel,loadCheckpoint # vgg16_checkpoint=loadCheckpoint('/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/vgg16/model'); # # VGG16 = CenterLossModel(embedding_size=512,num_classes=712,checkpoint=vgg16_checkpoint).eval().to(device_1) # print('load VGG16 successfully') arc_face_ir_se_50 = Arcface() arc_face_ir_se_50.eval() arc_face_ir_se_50.to(device_0) models = [] models.append(InceptionResnet_model_1) models.append(IR_50_model_1) models.append(Insightface_iresnet34) models.append(Insightface_iresnet50) models.append(Insightface_iresnet100) models.append(arc_face_ir_se_50) criterion = nn.MSELoss() # cpu # collect all images to attack paths = [] picpath = '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/images' for root, dirs, files in os.walk(picpath): for f in files: paths.append(os.path.join(root, f)) random.shuffle(paths) # paras eps = 1 steps = 50 output_path = './output_img' momentum = 0.3 alpha = 0.35 beta = 0.3 gamma = 0.1 #####cal mean feature face print('cal mean feature face #########################') # ########## # cal mean feature face on only 712 images # mean_face_1 = torch.zeros(1,3,112,112).detach() # for path in tqdm(paths): # image = Image.open(path) # in_tensor_1 = img2tensor(np.array(image)) # mean_face_1 += in_tensor_1 # mean_face_1 = mean_face_1 / 712 # ########## # with torch.no_grad(): # mean_face = torch.zeros(512).detach() # for path in tqdm(paths): # start = time.time() # print('cal mean face ' + path + ' ===============>') # image = Image.open(path) # # # # define paras # # in_tensor is origin tensor of image # # in_variable changes with gradient # # in_tensor_1 = img2tensor(np.array(image)) # # print(in_tensor.shape) # this_feature_face = None # # # # # origin feature # # _origin_InceptionResnet_model_1 = InceptionResnet_model_1(in_tensor_1.to(device_0)).cpu() # #################### # # _origin_InceptionResnet_model_2 = InceptionResnet_model_2(in_tensor_1.to(device_0)).cpu() # ###################### # # _origin_IR_50_model_1 = IR_50_model_1(in_tensor_1.to(device_0)).cpu() # ########################### # # _origin_IR_152_model_1 = IR_152_model_1(in_tensor_1.to(device_0)).cpu() # # _origin_IR_SE_50 = IR_SE_50(in_tensor_1.to(device_1)).cpu() # # _origin_mobileFaceNet = mobileFaceNet(in_tensor_1.to(device_0)).cpu() # ############################# # # _origin_Insightface_iresent34 = Insightface_iresnet34(in_tensor_1.to(device_1)).cpu() # # # _origin_Insightface_iresent50 = Insightface_iresnet50(in_tensor_1.to(device_1)).cpu() # # # _origin_Insightface_iresent100 = Insightface_iresnet100(in_tensor_1.to(device_1)).cpu() # # # _origin_arcface = arc_face_ir_se_50(in_tensor_1.to(device_0)).cpu() # # ######################## # # _origin_VGG16 = VGG16.forward_GetFeature(in_tensor_1.to(device_1)).cpu() # ######################## # # this_feature_face = _origin_InceptionResnet_model_1 + \ # _origin_Insightface_iresent34 + \ # _origin_Insightface_iresent50 + \ # _origin_Insightface_iresent100 + \ # _origin_arcface # # # this_feature_face = _origin_InceptionResnet_model_1 + \ # # _origin_InceptionResnet_model_2 +\ # # _origin_IR_50_model_1 + \ # # _origin_IR_152_model_1 +\ # # _origin_IR_SE_50 +\ # # _origin_mobileFaceNet +\ # # _origin_Insightface_iresent34 + \ # # _origin_Insightface_iresent50 + \ # # _origin_Insightface_iresent100 + \ # # _origin_arcface +\ # # _origin_VGG16 # # # this_feature_face = this_feature_face / 5. # mean_face = mean_face + this_feature_face # # # del _origin_InceptionResnet_model_1 # # del _origin_InceptionResnet_model_2 # # del _origin_IR_50_model_1 # # del _origin_IR_152_model_1 # # del _origin_IR_SE_50 # # del _origin_mobileFaceNet # # del _origin_Insightface_iresent34 # # del _origin_Insightface_iresent50 # # del _origin_Insightface_iresent100 # # del _origin_VGG16 # # del _origin_arcface # # del this_feature_face # # del in_tensor_1 # # del _origin_InceptionResnet_model_1 # # del _origin_IR_50_model_1 # del _origin_Insightface_iresent34 # del _origin_Insightface_iresent50 # del _origin_Insightface_iresent100 # del _origin_arcface # del this_feature_face # del in_tensor_1 # # mean_face = mean_face / 712. mean_face = cal_mean_face_by_extend_dataset(models) print('finish cal mean face...') ############################ print('######attack...##################') from mydataset import CustomDataset custom_dataset = CustomDataset() train_loader = torch.utils.data.DataLoader(dataset=custom_dataset, batch_size=6, shuffle=True) count = 0 progressRate = 0.0 for i, (x, path) in enumerate(train_loader): start = time.time() print('processing ' + str(progressRate) + ' ===============>') in_tensor = x origin_variable = in_tensor.detach() origin_variable = origin_variable in_variable = in_tensor.detach() in_variable = in_variable ########## in_variable_max = in_tensor.detach() in_variable_min = in_tensor.detach() in_variable_max = in_variable + 0.1 in_variable_min = in_variable - 0.1 ########## in_tensor = in_tensor.squeeze() in_tensor = in_tensor adv = None perturbation = torch.Tensor(x.size(0), 3, 112, 112).uniform_(-0.05, 0.05) ###这里测试 perturbation = perturbation in_variable += perturbation in_variable.data.clamp_(-1.0, 1.0) in_variable.requires_grad = True g_noise = torch.zeros_like(in_variable) g_noise = g_noise origin_InceptionResnet_model_1 = InceptionResnet_model_1( origin_variable.to(device_0)).cpu() # origin_InceptionResnet_model_2 = InceptionResnet_model_2(origin_variable.to(device_0)).cpu() origin_IR_50_model_1 = IR_50_model_1( origin_variable.to(device_0)).cpu() # origin_IR_152_model_1 = IR_152_model_1(origin_variable.to(device_0)).cpu() # origin_IR_SE_50 = IR_SE_50(origin_variable.to(device_1)).cpu() # origin_mobileFaceNet = mobileFaceNet(origin_variable.to(device_0)).cpu() origin_Insightface_iresent34 = Insightface_iresnet34( origin_variable.to(device_1)).cpu() origin_Insightface_iresent50 = Insightface_iresnet50( origin_variable.to(device_1)).cpu() origin_Insightface_iresent100 = Insightface_iresnet100( origin_variable.to(device_1)).cpu() origin_arcface = arc_face_ir_se_50(origin_variable.to(device_0)).cpu() # origin_VGG16 = VGG16.forward_GetFeature(origin_variable.to(device_1)).cpu() # origin_average_out = (origin_InceptionResnet_model_1+origin_IR_50_model_1+origin_Insightface_iresent34+\ # origin_Insightface_iresent50+origin_Insightface_iresent100+origin_arcface)/6 # origin_average_out =(origin_InceptionResnet_model_1+\ # origin_InceptionResnet_model_2+ \ # origin_IR_50_model_1+\ # origin_IR_152_model_1+\ # origin_IR_SE_50+\ # origin_mobileFaceNet+\ # origin_Insightface_iresent34+\ # origin_Insightface_iresent50 +\ # origin_Insightface_iresent100 +\ # origin_arcface +\ # origin_VGG16) /11. origin_average_out =(origin_InceptionResnet_model_1+ \ origin_IR_50_model_1 +\ origin_Insightface_iresent34+\ origin_Insightface_iresent50 +\ origin_Insightface_iresent100 +\ origin_arcface ) /6. # target pix mean face # After verification, it found no use. # target_mean_face_InceptionResnet_model_1 = InceptionResnet_model_1(mean_face_1.to(device_0)).cpu() # target_mean_face_IR_50_model_1 = IR_50_model_1(mean_face_1.to(device_0)).cpu() # target_mean_face_Insightface_iresent34 = Insightface_iresnet34(mean_face_1.to(device_1)).cpu() # target_mean_face_Insightface_iresent50 = Insightface_iresnet50(mean_face_1.to(device_1)).cpu() # target_mean_faceInsightface_iresent100 = Insightface_iresnet100(mean_face_1.to(device_1)).cpu() # target_mean_face_arcface = arc_face_ir_se_50(mean_face_1.to(device_0)).cpu() # target_mean_face_average_out = (target_mean_face_InceptionResnet_model_1 + target_mean_face_IR_50_model_1 + target_mean_face_Insightface_iresent34 + target_mean_face_Insightface_iresent50 + target_mean_faceInsightface_iresent100 + target_mean_face_arcface)/ 6 # sum gradient for i in range(steps): print('step: ' + str(i)) # new_variable = input_diversity(in_variable,112,0.5) # 通过随机的size的padding，增加input的多样性 mediate_InceptionResnet_model_1 = InceptionResnet_model_1( in_variable.to(device_0)).cpu() # mediate_InceptionResnet_model_2 = InceptionResnet_model_2(new_variable.to(device_0)).cpu() mediate_IR_50_model_1 = IR_50_model_1( in_variable.to(device_0)).cpu() # mediate_IR_152_model_1 = IR_152_model_1(new_variable.to(device_0)).cpu() # mediate_IR_SE_50 = IR_SE_50(new_variable.to(device_1)).cpu() # mediate_mobileFaceNet = mobileFaceNet(new_variable.to(device_0)).cpu() mediate_Insightface_iresent34 = Insightface_iresnet34( in_variable.to(device_1)).cpu() mediate_Insightface_iresent50 = Insightface_iresnet50( in_variable.to(device_1)).cpu() mediate_Insightface_iresent100 = Insightface_iresnet100( in_variable.to(device_1)).cpu() # mediate_VGG16 = VGG16.forward_GetFeature(new_variable.to(device_1)).cpu() mediate_arcface = arc_face_ir_se_50(in_variable.to(device_0)).cpu() # average_out = (mediate_InceptionResnet_model_1+mediate_InceptionResnet_model_2+mediate_IR_50_model_1+\ # mediate_IR_152_model_1+mediate_IR_SE_50+mediate_mobileFaceNet+mediate_Insightface_iresent34+\ # mediate_Insightface_iresent50+mediate_Insightface_iresent100+mediate_VGG16)/10 # mediate_average_out = (mediate_InceptionResnet_model_1+mediate_IR_50_model_1+mediate_Insightface_iresent34+\ # mediate_Insightface_iresent50+mediate_Insightface_iresent100+mediate_arcface)/6 # mediate_average_out = (mediate_InceptionResnet_model_1+\ # mediate_InceptionResnet_model_2+\ # mediate_IR_50_model_1+\ # mediate_IR_152_model_1+\ # mediate_IR_SE_50+\ # mediate_mobileFaceNet+\ # mediate_Insightface_iresent34+\ # mediate_Insightface_iresent50+\ # mediate_Insightface_iresent100 +\ # mediate_VGG16+\ # mediate_arcface) /11. mediate_average_out = (mediate_InceptionResnet_model_1+ \ mediate_IR_50_model_1 +\ mediate_Insightface_iresent34+\ mediate_Insightface_iresent50+\ mediate_Insightface_iresent100 +\ mediate_arcface) /6. # loss1 = criterion(mediate_InceptionResnet_model_1, origin_InceptionResnet_model_1) + \ # criterion(mediate_InceptionResnet_model_2, origin_InceptionResnet_model_2) + \ # criterion(mediate_IR_50_model_1, origin_IR_50_model_1) + \ # criterion(mediate_IR_152_model_1, origin_IR_152_model_1) + \ # criterion(mediate_IR_SE_50, origin_IR_SE_50) + \ # criterion(mediate_mobileFaceNet, origin_mobileFaceNet)+ \ # criterion(mediate_Insightface_iresent34, origin_Insightface_iresent34)+ \ # criterion(mediate_Insightface_iresent50, origin_Insightface_iresent50) + \ # criterion(mediate_Insightface_iresent100, origin_Insightface_iresent100) + \ # criterion(mediate_VGG16, origin_VGG16) loss1 = criterion(mediate_average_out, origin_average_out) # loss2 = criterion(mediate_InceptionResnet_model_1, mean_face) + \ # criterion(mediate_InceptionResnet_model_2, mean_face) + \ # criterion(mediate_IR_50_model_1, mean_face) + \ # criterion(mediate_IR_152_model_1, mean_face) +\ # criterion(mediate_IR_SE_50, mean_face) + \ # criterion(mediate_mobileFaceNet, mean_face) + \ # criterion(mediate_Insightface_iresent34, mean_face) + \ # criterion(mediate_Insightface_iresent50, mean_face) + \ # criterion(mediate_Insightface_iresent100, mean_face) + \ # criterion(mediate_VGG16, mean_face) # loss2 = criterion(mediate_average_out, target_mean_face_average_out) # loss3 = criterion(mediate_average_out,torch.zeros(512).detach()) loss2 = criterion(mediate_average_out, mean_face) # loss3 = criterion(mediate_InceptionResnet_model_1,average_out)+ \ # criterion(mediate_InceptionResnet_model_2,average_out)+ \ # criterion(mediate_IR_50_model_1,average_out) + \ # criterion(mediate_IR_152_model_1,average_out) + \ # criterion(mediate_mobileFaceNet,average_out) + \ # criterion(mediate_Insightface_iresent34,average_out)+ \ # criterion(mediate_Insightface_iresent50,average_out) + \ # criterion(mediate_Insightface_iresent100,average_out)+ \ # criterion(mediate_VGG16,average_out)+ \ # criterion(mediate_IR_SE_50,average_out) # # loss = alpha * loss1 - beta* loss2 - gamma*loss3 loss = alpha * loss1 - beta * loss2 # print('loss : %f ' % loss,'loss1 : %f ' % loss1,'loss2 : %f ' % loss2,'loss3 : %f ' % loss3) # compute gradients loss.backward(retain_graph=True) g_noise = momentum * g_noise + (in_variable.grad / in_variable.grad.data.norm(1)) g_noise = g_noise / g_noise.data.norm(1) g1 = g_noise g2 = g_noise # if i % 3 == 0 : kernel = gkern(3, 2).astype(np.float32) gaussian_blur1 = GaussianBlur(kernel) gaussian_blur1 g1 = gaussian_blur1(g1) # else: addition = TVLoss() addition g2 = addition(g2) g_noise = 0.25 * g1 + 0.75 * g2 in_variable.data = in_variable.data + ( (eps / 255.) * torch.sign(g_noise) ) # * torch.from_numpy(mat).unsqueeze(0).float() in_variable.data = clip_by_tensor(in_variable.data, in_variable_min.data, in_variable_max.data) in_variable.grad.data.zero_() # unnecessary # del new_variable # g_noise = in_variable.data - origin_variable # g_noise.clamp_(-0.2, 0.2) # in_variable.data = origin_variable + g_noise # deprocess image for i in range(len(in_variable.data.cpu().numpy())): adv = in_variable.data.cpu().numpy()[i] # (3, 112, 112) perturbation = (adv - in_tensor.cpu().numpy()) adv = adv * 128.0 + 127.0 adv = adv.swapaxes(0, 1).swapaxes(1, 2) adv = adv[..., ::-1] adv = np.clip(adv, 0, 255).astype(np.uint8) # sample_dir = './target_mean_face/' # if not os.path.exists(sample_dir): # os.makedirs(sample_dir) advimg = sample_dir + path[i].split('/')[-1].split( '.')[-2] + '.jpg' print(advimg) cv2.imwrite(advimg, adv) print("save path is " + advimg) print('cost time is %.2f s ' % (time.time() - start)) count += 6 progressRate = count / 712.

def soft_thresholding(input, alpha): return torch.sign(input) * torch.max(torch.zeros(len(input)).cuda(), torch.abs(input) - alpha)

def green_coordinates_3D(query, vertices, faces, face_normals=None, verbose=False): """ Lipman et.al. sum_{i\in N}(phi_i*v_i)+sum_{j\in F}(psi_j*n_j) http://www.wisdom.weizmann.ac.il/~ylipman/GC/CageMesh_GreenCoords.cpp params: query (B,P,D), D=3 vertices (B,N,D), D=3 faces (B,F,3) return: phi_i (B,P,N) psi_j (B,P,F) exterior_flag (B,P) """ B, F, _ = faces.shape _, P, D = query.shape _, N, D = vertices.shape # (B,F,D) n_t = face_normals if n_t is None: # compute face normal n_t, _ = compute_face_normals_and_areas(vertices, faces) vertices = vertices.detach() # (B,N,D) (B,F,3) -> (B,F,3,3) face points v_jl = torch.gather(vertices.unsqueeze(1).expand(-1,F,-1,-1), 2, faces.unsqueeze(-1).expand(-1,-1,-1,3)) # v_jl = v_jl - x (B,P,F,3,3) v_jl = v_jl.view(B,1,F,3,3) - query.view(B,P,1,1,3) # (B,P,F,D).(B,1,F,D) -> (B,P,F,1)*(B,P,F,D) projection of v1_x on the normal p = dot_product(v_jl[:,:,:,0,:], n_t.unsqueeze(1).expand(-1,P,-1,-1), dim=-1, keepdim=True)*n_t.unsqueeze(1) # B,P,F,3,D -> B,P,F,3 s_l = torch.sign(dot_product(torch.cross(v_jl-p.unsqueeze(-2), v_jl[:,:,:,[1,2,0],:]-p.unsqueeze(-2), dim=-1), n_t.view(B,1,F,1,D))) # import pdb; pdb.set_trace() # (B,P,F,3) I_l = _gcTriInt(p, v_jl, v_jl[:,:,:,[1,2,0],:], None) # (B,P,F) I = -torch.abs(torch.sum(s_l*I_l, dim=-1)) GC_face = -I assert(check_values(GC_face)) II_l = _gcTriInt(torch.zeros_like(p), v_jl[:,:,:,[1,2,0], :], v_jl, None) # (B,P,F,3,D) N_l = torch.cross(v_jl[:,:,:,[1,2,0],:], v_jl, dim=-1) N_l_norm = torch.norm(N_l, dim=-1, p=2) II_l.masked_fill_(N_l_norm<1e-7, 0) # normalize but ignore those with small norms N_l = torch.where((N_l_norm>1e-7).unsqueeze(-1), N_l/N_l_norm.unsqueeze(-1), N_l) # (B,P,F,D) omega = n_t.unsqueeze(1)*I.unsqueeze(-1)+torch.sum(N_l*II_l.unsqueeze(-1), dim=-2) eps = 1e-6 # (B,P,F,3) phi_jl = dot_product(N_l[:,:,:,[1,2,0],:], omega.unsqueeze(-2), dim=-1)/(dot_product(N_l[:,:,:,[1,2,0],:], v_jl, dim=-1)+1e-10) # on the same plane don't contribute to phi phi_jl.masked_fill_((torch.norm(omega, p=2, dim=-1)<eps).unsqueeze(-1), 0) # sum per face weights to per vertex weights GC_vertex = scatter_add(phi_jl.reshape(B,P,-1).contiguous(), faces.unsqueeze(1).expand(-1,P,-1,-1).reshape(B,P,-1), 2, out_size=(B,P,N)) assert(check_values(GC_vertex)) # NOTE the point is inside the face, remember factor 2 # insideFace = (torch.norm(omega,dim=-1)<1e-5)&torch.all(s_l>0,dim=-1) # phi_jl = torch.where(insideFace.unsqueeze(-1), phi_jl, torch.zeros_like(phi_jl)) # normalize sumGC_V = torch.sum(GC_vertex, dim=2, keepdim=True) exterior_flag = sumGC_V<0.5 GC_vertex = GC_vertex/(sumGC_V+1e-10) # GC_vertex.masked_fill_(sumGC_V.abs()<eps, 0.0) return GC_vertex, GC_face, exterior_flag

def _eps(self, shape, dtype, device): r = torch.normal(mean=0.0, std=1.0, size=(shape,), dtype=dtype, device=device) return torch.abs(torch.sqrt(torch.abs(r))) * torch.sign(r)

def forward(self, x, doc_lens): N = x.size(0) L = x.size(1) B = len(doc_lens) H = self.args.hidden_size word_mask = torch.ones_like(x) - torch.sign(x) word_mask = word_mask.data.type(torch.cuda.ByteTensor).view(N, 1, L) x = self.embed(x) # (N,L,D) x, _ = self.word_RNN(x) # attention query = self.word_query.expand(N, -1, -1).contiguous() self.attn.set_mask(word_mask) word_out = self.attn(query, x)[0].squeeze(1) # (N,2*H) x = self.pad_doc(word_out, doc_lens) # sent level GRU sent_out = self.sent_RNN(x)[0] # (B,max_doc_len,2*H) # docs = self.avg_pool1d(sent_out,doc_lens) # (B,2*H) max_doc_len = max(doc_lens) mask = torch.ones(B, max_doc_len) for i in range(B): for j in range(doc_lens[i]): mask[i][j] = 0 sent_mask = mask.type(torch.cuda.ByteTensor).view(B, 1, max_doc_len) # attention query = self.sent_query.expand(B, -1, -1).contiguous() self.attn.set_mask(sent_mask) docs = self.attn(query, x)[0].squeeze(1) # (B,2*H) probs = [] for index, doc_len in enumerate(doc_lens): valid_hidden = sent_out[index, :doc_len, :] # (doc_len,2*H) doc = torch.tanh(self.fc(docs[index])).unsqueeze(0) s = Variable(torch.zeros(1, 2 * H)) if self.args.device is not None: s = s.cuda() for position, h in enumerate(valid_hidden): h = h.view(1, -1) # (1,2*H) # get position embeddings abs_index = Variable(torch.LongTensor([[position]])) if self.args.device is not None: abs_index = abs_index.cuda() abs_features = self.abs_pos_embed(abs_index).squeeze(0) rel_index = int(round((position + 1) * 9.0 / doc_len)) rel_index = Variable(torch.LongTensor([[rel_index]])) if self.args.device is not None: rel_index = rel_index.cuda() rel_features = self.rel_pos_embed(rel_index).squeeze(0) # classification layer content = self.content(h) salience = self.salience(h, doc) novelty = -1 * self.novelty(h, torch.tanh(s)) abs_p = self.abs_pos(abs_features) rel_p = self.rel_pos(rel_features) prob = torch.sigmoid(content + salience + novelty + abs_p + rel_p + self.bias) s = s + torch.mm(prob, h) # print position, torch.sigmoid(abs_p + rel_p) probs.append(prob) return torch.cat(probs).squeeze()

def forward(ctx, input): return torch.sign(input)

def rprop(opfunc, x, config, state=None): """ A plain implementation of RPROP ARGS: - `opfunc` : a function that takes a single input (X), the point of evaluation, and returns f(X) and df/dX - `x` : the initial point - `state` : a table describing the state of the optimizer; after each call the state is modified - `state['stepsize']` : initial step size, common to all components - `state['etaplus']` : multiplicative increase factor, > 1 (default 1.2) - `state['etaminus']` : multiplicative decrease factor, < 1 (default 0.5) - `state['stepsizemax']` : maximum stepsize allowed (default 50) - `state['stepsizemin']` : minimum stepsize allowed (default 1e-6) - `state['niter']` : number of iterations (default 1) RETURN: - `x` : the new x vector - `f(x)` : the function, evaluated before the update (Martin Riedmiller, Koray Kavukcuoglu 2013) """ if config is None and state is None: raise ValueError("rprop requires a dictionary to retain state between iterations") # (0) get/update state state = state if state is not None else config stepsize = config.get('stepsize', 0.1) etaplus = config.get('etaplus', 1.2) etaminus = config.get('etaminus', 0.5) stepsizemax = config.get('stepsizemax', 50.0) stepsizemin = config.get('stepsizemin', 1e-06) niter = config.get('niter', 1) hfx = [] for i in range(niter): # (1) evaluate f(x) and df/dx fx, dfdx = opfunc(x) # init temp storage if 'delta' not in state: state['delta'] = dfdx.new(dfdx.size()).zero_() state['stepsize'] = dfdx.new(dfdx.size()).fill_(stepsize) state['sign'] = dfdx.new(dfdx.size()) state['bytesign'] = torch.ByteTensor(dfdx.size()) state['psign'] = torch.ByteTensor(dfdx.size()) state['nsign'] = torch.ByteTensor(dfdx.size()) state['zsign'] = torch.ByteTensor(dfdx.size()) state['dminmax'] = torch.ByteTensor(dfdx.size()) if str(type(x)).find('Cuda') > -1: # Push to GPU state['psign'] = state['psign'].cuda() state['nsign'] = state['nsign'].cuda() state['zsign'] = state['zsign'].cuda() state['dminmax'] = state['dminmax'].cuda() # sign of derivative from last step to this one torch.mul(dfdx, state['delta'], out=state['sign']).sign_() # get indices of >0, <0 and ==0 entries torch.gt(state['sign'], 0, out=state['psign']) torch.lt(state['sign'], 0, out=state['nsign']) torch.eq(state['sign'], 0, out=state['zsign']) # get step size updates state['sign'][state['psign']] = etaplus state['sign'][state['nsign']] = etaminus state['sign'][state['zsign']] = 1 # update stepsizes with step size updates state['stepsize'].mul_(state['sign']) # threshold step sizes # >50 => 50 torch.gt(state['stepsize'], stepsizemax, out=state['dminmax']) state['stepsize'][state['dminmax']] = stepsizemax # <1e-6 ==> 1e-6 torch.lt(state['stepsize'], stepsizemin, out=state['dminmax']) state['stepsize'][state['dminmax']] = stepsizemin # for dir<0, dfdx=0 # for dir>=0 dfdx=dfdx dfdx[state['nsign']] = 0 torch.sign(dfdx, out=state['sign']) # update weights x.addcmul_(-1, state['sign'], state['stepsize']) # update state['dfdx'] with current dfdx state['delta'].copy_(dfdx) hfx.append(fx) # return x*, table of f(x) values from each step return x, hfx

def cbrt(x): """Cube root. Equivalent to torch.pow(x, 1/3), but numerically stable.""" return torch.sign(x) * torch.exp(torch.log(torch.abs(x)) / 3.0)

def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--exp_name', default='ijba_eval') parser.add_argument('-g', '--gpu', type=int, default=0) parser.add_argument('-d', '--data_dir', default='/home/renyi/arunirc/data1/datasets/CS2') parser.add_argument('-p', '--protocol_dir', default='/home/renyi/arunirc/data1/datasets/IJB-A/IJB-A_11_sets/') parser.add_argument('--fold', type=int, default=1, choices=[1,10]) parser.add_argument('--sqrt', action='store_true', default=False, help='Add signed sqrt normalization') parser.add_argument('--cosine', action='store_true', default=False, help='Use cosine similarity instead of L2 distance') parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('-m', '--model_path', default=MODEL_PATH, help='Path to pre-trained model') parser.add_argument('--model_type', default=MODEL_TYPE, choices=['resnet50', 'resnet101', 'resnet101-512d', 'resnet101-512d-norm']) args = parser.parse_args() # CUDA setup os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # enable if all images are same size # ----------------------------------------------------------------------------- # 1. Model # ----------------------------------------------------------------------------- num_class = 8631 if args.model_type == 'resnet50': model = torchvision.models.resnet50(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101': model = torchvision.models.resnet101(pretrained=False) model.fc = torch.nn.Linear(2048, num_class) elif args.model_type == 'resnet101-512d': model = torchvision.models.resnet101(pretrained=False) layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) elif args.model_type == 'resnet101-512d-norm': model = torchvision.models.resnet101(pretrained=False) layers = [] layers.append(torch.nn.Linear(2048, 512)) layers.append(models.NormFeat(scale_factor=50.0)) layers.append(torch.nn.Linear(512, num_class)) model.fc = torch.nn.Sequential(*layers) else: raise NotImplementedError checkpoint = torch.load(args.model_path) if checkpoint['arch'] == 'DataParallel': model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4]) model.load_state_dict(checkpoint['model_state_dict']) model = model.module # get network module from inside its DataParallel wrapper else: model.load_state_dict(checkpoint['model_state_dict']) if cuda: model = model.cuda() # Convert the trained network into a "feature extractor" feature_map = list(model.children()) if args.model_type == 'resnet101-512d' or args.model_type == 'resnet101-512d-norm': model.eval() extractor = model extractor.fc = nn.Sequential(extractor.fc[0]) else: feature_map.pop() extractor = nn.Sequential(*feature_map) extractor.eval() # ALWAYS set to evaluation mode (fixes BatchNorm, dropout, etc.) # ----------------------------------------------------------------------------- # 2. Dataset # ----------------------------------------------------------------------------- fold_id = 1 file_ext = '.jpg' RGB_MEAN = [ 0.485, 0.456, 0.406 ] RGB_STD = [ 0.229, 0.224, 0.225 ] test_transform = transforms.Compose([ # transforms.Scale(224), # transforms.CenterCrop(224), transforms.Scale((224,224)), transforms.ToTensor(), transforms.Normalize(mean = RGB_MEAN, std = RGB_STD), ]) pairs_path = osp.join(args.protocol_dir, 'split%d' % fold_id, 'verify_comparisons_%d.csv' % fold_id) pairs = utils.read_ijba_pairs(pairs_path) protocol_file = osp.join(args.protocol_dir, 'split%d' % fold_id, 'verify_metadata_%d.csv' % fold_id) metadata = utils.get_ijba_1_1_metadata(protocol_file) # dict assert np.all(np.unique(pairs) == np.unique(metadata['template_id'])) # sanity-check path_list = np.array([osp.join(args.data_dir, str(x)+file_ext) for x in metadata['sighting_id'] ]) # face crops saved as <sighting_id.jpg> # Create data loader test_loader = torch.utils.data.DataLoader( data_loader.IJBADataset( path_list, test_transform, split=fold_id), batch_size=args.batch_size, shuffle=False ) # testing # for i in range(len(test_loader.dataset)): # img = test_loader.dataset.__getitem__(i) # sz = img.shape # if sz[0] != 3: # print sz # ----------------------------------------------------------------------------- # 3. Feature extraction # ----------------------------------------------------------------------------- print 'Feature extraction...' cache_dir = osp.join(here, 'cache-' + args.model_type) if not osp.exists(cache_dir): os.makedirs(cache_dir) feat_path = osp.join(cache_dir, 'feat-fold-%d.mat' % fold_id) if not osp.exists(feat_path): features = [] for batch_idx, images in tqdm.tqdm(enumerate(test_loader), total=len(test_loader), desc='Extracting features'): x = Variable(images, volatile=True) # test-time memory conservation if cuda: x = x.cuda() feat = extractor(x) if cuda: feat = feat.data.cpu() # free up GPU else: feat = feat.data features.append(feat) features = torch.cat(features, dim=0) # (n_batch*batch_sz) x 512 sio.savemat(feat_path, {'feat': features.cpu().numpy() }) else: dat = sio.loadmat(feat_path) features = torch.FloatTensor(dat['feat']) del dat print 'Loaded.' # ----------------------------------------------------------------------------- # 4. Verification # ----------------------------------------------------------------------------- scores = [] labels = [] # labels: is_same_subject print 'Computing pair labels . . . ' for pair in tqdm.tqdm(pairs): # TODO - check tqdm sel_t0 = np.where(metadata['template_id'] == pair[0]) sel_t1 = np.where(metadata['template_id'] == pair[1]) subject0 = np.unique(metadata['subject_id'][sel_t0]) subject1 = np.unique(metadata['subject_id'][sel_t1]) labels.append(int(subject0 == subject1)) labels = np.array(labels) print 'done' # templates: average pool, then L2-normalize print 'Pooling templates . . . ' pooled_features = [] template_set = np.unique(metadata['template_id']) for tid in tqdm.tqdm(template_set): sel = np.where(metadata['template_id'] == tid) # pool template: 1 x n x 512 -> 1 x 512 feat = features[sel,:].mean(1) if args.sqrt: # signed-square-root normalization feat = torch.mul(torch.sign(feat),torch.sqrt(torch.abs(feat)+1e-12)) pooled_features.append(F.normalize(feat, p=2, dim=1) ) pooled_features = torch.cat(pooled_features, dim=0) # (n_batch*batch_sz) x 512 print 'done' print 'Computing pair distances . . . ' for pair in tqdm.tqdm(pairs): sel_t0 = np.where(template_set == pair[0]) sel_t1 = np.where(template_set == pair[1]) if args.cosine: feat_dist = torch.dot(torch.squeeze(pooled_features[sel_t0]), torch.squeeze(pooled_features[sel_t1])) else: feat_dist = (pooled_features[sel_t0] - pooled_features[sel_t1]).norm(p=2, dim=1) feat_dist = -torch.squeeze(feat_dist) feat_dist = feat_dist.numpy() scores.append(feat_dist) # score: negative of L2-distance scores = np.array(scores) # Metrics: TAR (tpr) at FAR (fpr) fpr, tpr, thresholds = sklearn.metrics.roc_curve(labels, scores) fpr_levels = [0.0001, 0.001, 0.01, 0.1] f_interp = interpolate.interp1d(fpr, tpr) tpr_at_fpr = [ f_interp(x) for x in fpr_levels ] for (far, tar) in zip(fpr_levels, tpr_at_fpr): print 'TAR @ FAR=%.4f : %.4f' % (far, tar) res = {} res['TAR'] = tpr_at_fpr res['FAR'] = fpr_levels with open( osp.join(cache_dir, 'result-1-1-fold-%d.yaml' % fold_id), 'w') as f: yaml.dump(res, f, default_flow_style=False) sio.savemat(osp.join(cache_dir, 'roc-1-1-fold-%d.mat' % fold_id), {'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds, 'tpr_at_fpr': tpr_at_fpr})

def quantize_output(self, output, wrapper, **kwargs): out = torch.sign(output) # remove zeros out[out == 0] = 1 return out

def _float2mu(self, data): y=[ torch.sign(x)*( (torch.log(1+(self.nvals-1)*abs(x)))/self.lognvals) for x in data] #[-1,1] float return y

def linf_step(x, g, lr): return x + lr * ch.sign(g)

def compute_accuracy(predictions, labels): '''Compute classification accuracy''' predictions = torch.sign(predictions) correct = predictions.eq(labels) result = correct.sum().data.cpu() return result

def calculate_energy(self, start_state, end_state, p_hat, flows={'e': None}): with torch.no_grad(): # prepare data w_hat = p_hat[0] b_hat = p_hat[1] start_dim = start_state.size(-1) bsz, tnsz, tsz, end_dim = end_state.size() start_state = start_state.view(-1, start_dim) end_state = end_state.view(-1, end_dim) - b_hat # distance distance = F.kl_div(input=torch.log_softmax(torch.abs(w_hat), dim=-1), target=self.field.to(w_hat.device), reduction='none').float() distance = distance.sum(-1) # time sign_pair = torch.matmul(torch.sign(start_state.unsqueeze(-1)), torch.sign(end_state.unsqueeze(-2))) w_hat_x = w_hat * sign_pair freq = (torch.relu(w_hat_x).sum(-1) / torch.abs(w_hat_x).sum(-1)) num_nan = torch.isnan(freq).float().mean() if num_nan > 0.1: logging.warning( "Energy sequence impose the nan {}".format(num_nan)) freq[torch.isnan(freq)] = 0.5 time = torch.abs(freq - 0.5) + 1 if self.target_type == 'mvn': energy = torch.pow(time * torch.abs(start_state), 2) elif self.target_type == 'residual': energy = distance * torch.abs(start_state) * freq energy = energy.mean(-1) energy = energy.view(bsz, tnsz, tsz) # # make gaussian filter # variance = 3 # space = np.linspace(0, tsz - 1, tsz) # lpass_filter = np.expand_dims( # np.stack([self.gaussian_func(space, i, variance) for i in range(tsz)], axis=0), 0) # lpass_filter = torch.from_numpy(lpass_filter / np.amax(lpass_filter)).to(energy.device) # lpass_filter = torch.triu(lpass_filter).float() # # filtering with gaussain filter # energy = torch.matmul(energy.view(bsz, tnsz, tsz), lpass_filter) # # calculate energy difference # energy = torch.cat([energy[:, :, 1:] - energy[:, :, :-1], # torch.zeros_like(energy[:, :, 0:1]).to(energy.device)], dim=-1) # energy = torch.abs(energy) # # time step # freq = freq.mean(-1).view(bsz, tnsz, tsz) # freq = torch.cat([freq[:, :, 1:], # torch.ones_like(freq[:, :, 0:1]).to(energy.device)], dim=-1) # energy = energy / freq # calculate energy for key in flows.keys(): if key == 'e': # dump flows[key] = energy else: raise AttributeError( "'{}' type of augmentation factor is not defined!". format(key)) return flows

def h_func(x,epsilon= 10e-2): return torch.sign(x) * (torch.sqrt(torch.abs(x)+1)-1)+epsilon*x

def h_inv_func(x,epsilon= 10e-2): return torch.sign(x) * ((((torch.sqrt(1+4*epsilon*(torch.abs(x)+1+epsilon))-1)/(2*epsilon))**2)-1)

def odd(fn): return update_wrapper(lambda x: torch.sign(x) * fn(abs(x)), fn)

def threshold(z): z[z < 0] = 0 return torch.sign(z)

def attack_single_run(self, x, y, x_init=None): if len(x.shape) < self.ndims: x = x.unsqueeze(0) y = y.unsqueeze(0) if self.norm == 'Linf': t = 2 * torch.rand(x.shape).to(self.device).detach() - 1 x_adv = x + self.eps * torch.ones_like( x).detach() * self.normalize(t) elif self.norm == 'L2': t = torch.randn(x.shape).to(self.device).detach() x_adv = x + self.eps * torch.ones_like( x).detach() * self.normalize(t) elif self.norm == 'L1': t = torch.randn(x.shape).to(self.device).detach() delta = L1_projection(x, t, self.eps) x_adv = x + t + delta if not x_init is None: x_adv = x_init.clone() if self.norm == 'L1' and self.verbose: print('[custom init] L1 perturbation {:.5f}'.format( (x_adv - x).abs().view(x.shape[0], -1).sum(1).max())) x_adv = x_adv.clamp(0., 1.) x_best = x_adv.clone() x_best_adv = x_adv.clone() loss_steps = torch.zeros([self.n_iter, x.shape[0]]).to(self.device) loss_best_steps = torch.zeros([self.n_iter + 1, x.shape[0]]).to(self.device) acc_steps = torch.zeros_like(loss_best_steps) if not self.is_tf_model: if self.loss == 'ce': criterion_indiv = nn.CrossEntropyLoss(reduction='none') elif self.loss == 'ce-targeted-cfts': criterion_indiv = lambda x, y: -1. * F.cross_entropy( x, y, reduction='none') elif self.loss == 'dlr': criterion_indiv = self.dlr_loss elif self.loss == 'dlr-targeted': criterion_indiv = self.dlr_loss_targeted elif self.loss == 'ce-targeted': criterion_indiv = self.ce_loss_targeted else: raise ValueError('unknowkn loss') else: if self.loss == 'ce': criterion_indiv = self.model.get_logits_loss_grad_xent elif self.loss == 'dlr': criterion_indiv = self.model.get_logits_loss_grad_dlr elif self.loss == 'dlr-targeted': criterion_indiv = self.model.get_logits_loss_grad_target else: raise ValueError('unknowkn loss') x_adv.requires_grad_() grad = torch.zeros_like(x) for _ in range(self.eot_iter): if not self.is_tf_model: with torch.enable_grad(): logits = self.model(x_adv) loss_indiv = criterion_indiv(logits, y) loss = loss_indiv.sum() grad += torch.autograd.grad(loss, [x_adv])[0].detach() else: if self.y_target is None: logits, loss_indiv, grad_curr = criterion_indiv(x_adv, y) else: logits, loss_indiv, grad_curr = criterion_indiv( x_adv, y, self.y_target) grad += grad_curr grad /= float(self.eot_iter) grad_best = grad.clone() acc = logits.detach().max(1)[1] == y acc_steps[0] = acc + 0 loss_best = loss_indiv.detach().clone() alpha = 2. if self.norm in ['Linf', 'L2' ] else 1. if self.norm in ['L1'] else 2e-2 step_size = alpha * self.eps * torch.ones( [x.shape[0], *([1] * self.ndims)]).to(self.device).detach() x_adv_old = x_adv.clone() counter = 0 k = self.n_iter_2 + 0 if self.norm == 'L1': k = max(int(.04 * self.n_iter), 1) n_fts = math.prod(self.orig_dim) if x_init is None: topk = .2 * torch.ones([x.shape[0]], device=self.device) sp_old = n_fts * torch.ones_like(topk) else: topk = L0_norm(x_adv - x) / n_fts / 1.5 sp_old = L0_norm(x_adv - x) #print(topk[0], sp_old[0]) adasp_redstep = 1.5 adasp_minstep = 10. #print(step_size[0].item()) counter3 = 0 loss_best_last_check = loss_best.clone() reduced_last_check = torch.ones_like(loss_best) n_reduced = 0 n_fts = x.shape[-3] * x.shape[-2] * x.shape[-1] u = torch.arange(x.shape[0], device=self.device) for i in range(self.n_iter): ### gradient step with torch.no_grad(): x_adv = x_adv.detach() grad2 = x_adv - x_adv_old x_adv_old = x_adv.clone() a = 0.75 if i > 0 else 1.0 if self.norm == 'Linf': x_adv_1 = x_adv + step_size * torch.sign(grad) x_adv_1 = torch.clamp( torch.min(torch.max(x_adv_1, x - self.eps), x + self.eps), 0.0, 1.0) x_adv_1 = torch.clamp( torch.min( torch.max( x_adv + (x_adv_1 - x_adv) * a + grad2 * (1 - a), x - self.eps), x + self.eps), 0.0, 1.0) elif self.norm == 'L2': x_adv_1 = x_adv + step_size * self.normalize(grad) x_adv_1 = torch.clamp( x + self.normalize(x_adv_1 - x) * torch.min(self.eps * torch.ones_like(x).detach(), self.lp_norm(x_adv_1 - x)), 0.0, 1.0) x_adv_1 = x_adv + (x_adv_1 - x_adv) * a + grad2 * (1 - a) x_adv_1 = torch.clamp( x + self.normalize(x_adv_1 - x) * torch.min(self.eps * torch.ones_like(x).detach(), self.lp_norm(x_adv_1 - x)), 0.0, 1.0) elif self.norm == 'L1': grad_topk = grad.abs().view(x.shape[0], -1).sort(-1)[0] topk_curr = torch.clamp((1. - topk) * n_fts, min=0, max=n_fts - 1).long() grad_topk = grad_topk[u, topk_curr].view( -1, *[1] * (len(x.shape) - 1)) sparsegrad = grad * (grad.abs() >= grad_topk).float() x_adv_1 = x_adv + step_size * sparsegrad.sign() / ( sparsegrad.sign().abs().view(x.shape[0], -1).sum( dim=-1).view(-1, *[1] * (len(x.shape) - 1)) + 1e-10) delta_u = x_adv_1 - x delta_p = L1_projection(x, delta_u, self.eps) x_adv_1 = x + delta_u + delta_p x_adv = x_adv_1 + 0. ### get gradient x_adv.requires_grad_() grad = torch.zeros_like(x) for _ in range(self.eot_iter): if not self.is_tf_model: with torch.enable_grad(): logits = self.model(x_adv) loss_indiv = criterion_indiv(logits, y) loss = loss_indiv.sum() grad += torch.autograd.grad(loss, [x_adv])[0].detach() else: if self.y_target is None: logits, loss_indiv, grad_curr = criterion_indiv( x_adv, y) else: logits, loss_indiv, grad_curr = criterion_indiv( x_adv, y, self.y_target) grad += grad_curr grad /= float(self.eot_iter) pred = logits.detach().max(1)[1] == y acc = torch.min(acc, pred) acc_steps[i + 1] = acc + 0 ind_pred = (pred == 0).nonzero(as_tuple=False).squeeze() x_best_adv[ind_pred] = x_adv[ind_pred] + 0. if self.verbose: str_stats = ' - step size: {:.5f} - topk: {:.2f}'.format( step_size.mean(), topk.mean() * n_fts) if self.norm in ['L1'] else '' print( '[m] iteration: {} - best loss: {:.6f} - robust accuracy: {:.2%}{}' .format(i, loss_best.sum(), acc.float().mean(), str_stats)) #print('pert {}'.format((x - x_best_adv).abs().view(x.shape[0], -1).sum(-1).max())) ### check step size with torch.no_grad(): y1 = loss_indiv.detach().clone() loss_steps[i] = y1 + 0 ind = (y1 > loss_best).nonzero(as_tuple=False).squeeze() x_best[ind] = x_adv[ind].clone() grad_best[ind] = grad[ind].clone() loss_best[ind] = y1[ind] + 0 loss_best_steps[i + 1] = loss_best + 0 counter3 += 1 if counter3 == k: if self.norm in ['Linf', 'L2']: fl_oscillation = self.check_oscillation( loss_steps, i, k, loss_best, k3=self.thr_decr) fl_reduce_no_impr = (1. - reduced_last_check) * ( loss_best_last_check >= loss_best).float() fl_oscillation = torch.max(fl_oscillation, fl_reduce_no_impr) reduced_last_check = fl_oscillation.clone() loss_best_last_check = loss_best.clone() if fl_oscillation.sum() > 0: ind_fl_osc = (fl_oscillation > 0).nonzero( as_tuple=False).squeeze() step_size[ind_fl_osc] /= 2.0 n_reduced = fl_oscillation.sum() x_adv[ind_fl_osc] = x_best[ind_fl_osc].clone() grad[ind_fl_osc] = grad_best[ind_fl_osc].clone() k = max(k - self.size_decr, self.n_iter_min) elif self.norm == 'L1': sp_curr = L0_norm(x_best - x) fl_redtopk = (sp_curr / sp_old) < .95 topk = sp_curr / n_fts / 1.5 step_size[fl_redtopk] = alpha * self.eps step_size[~fl_redtopk] /= adasp_redstep step_size.clamp_(alpha * self.eps / adasp_minstep, alpha * self.eps) sp_old = sp_curr.clone() x_adv[fl_redtopk] = x_best[fl_redtopk].clone() grad[fl_redtopk] = grad_best[fl_redtopk].clone() counter3 = 0 #k = max(k - self.size_decr, self.n_iter_min) # return (x_best, acc, loss_best, x_best_adv)

def _gcTriInt(p, v1, v2, x): """ part of the gree coordinate 3D pseudo code params: p (B,P,F,3) v1 (B,P,F,3,3) v2 (B,P,F,3,3) x (B,P,F,3) return: (B,P,F,3) """ eps = 1e-6 angle_eps = 1e-3 div_guard = 1e-12 # (B,P,F,3,D) p_v1 = p.unsqueeze(-2)-v1 v2_p = v2-p.unsqueeze(-2) v2_v1 = v2-v1 # (B,P,F,3) p_v1_norm = torch.norm(p_v1, dim=-1, p=2) # (B,P,F,3) tempval = dot_product(v2_v1, p_v1, dim=-1)/(p_v1_norm*torch.norm(v2_v1, dim=-1, p=2)+div_guard) tempval.clamp_(-1.0,1.0) filter_mask = tempval.abs()>(1-eps) tempval.clamp_(-1.0+eps,1.0-eps) alpha = torch.acos(tempval) filter_mask = filter_mask | (torch.abs(alpha-np.pi)<angle_eps)|(torch.abs(alpha)<angle_eps) tempval = dot_product(-p_v1, v2_p, dim=-1)/(p_v1_norm*torch.norm(v2_p, dim=-1, p=2)+div_guard) tempval.clamp_(-1.0, 1.0) filter_mask = filter_mask|(torch.abs(tempval)>(1-eps)) tempval.clamp_(-1.0+eps,1.0-eps) beta = torch.acos(tempval) assert(check_values(alpha)) assert(check_values(beta)) # (B,P,F,3) lambd = (p_v1_norm*torch.sin(alpha))**2 # c (B,P,F,1) if x is not None: c = torch.sum((p-x)*(p-x), dim=-1,keepdim=True) else: c = torch.sum(p*p, dim=-1,keepdim=True) # theta in (pi-alpha, pi-alpha-beta) # (B,P,F,3) theta_1 = torch.clamp(np.pi - alpha, 0, np.pi) theta_2 = torch.clamp(np.pi - alpha - beta, -np.pi, np.pi) S_1, S_2 = torch.sin(theta_1), torch.sin(theta_2) C_1, C_2 = torch.cos(theta_1), torch.cos(theta_2) sqrt_c = torch.sqrt(c+div_guard) sqrt_lmbd = torch.sqrt(lambd+div_guard) theta_half = theta_1/2 filter_mask = filter_mask | ((C_1-1).abs()<eps) sqcot_1 = torch.where((C_1-1).abs()<eps, torch.zeros_like(C_1), S_1*S_1/((1-C_1)**2+div_guard)) # sqcot_1 = torch.where(theta_half.abs()<angle_eps, torch.zeros_like(theta_half), 1/(torch.tan(theta_half)**2+div_guard)) theta_half = theta_2/2 filter_mask = filter_mask | ((C_2-1).abs()<eps) sqcot_2 = torch.where((C_2-1).abs()<eps, torch.zeros_like(C_2), S_2*S_2/((1-C_2)**2+div_guard)) # sqcot_2 = torch.where(theta_half.abs()<angle_eps, torch.zeros_like(theta_half), 1/(torch.tan(theta_half)**2+div_guard)) # I=-0.5*Sign(sx)* ( 2*sqrtc*atan((sqrtc*cx) / (sqrt(a+c*sx*sx) ) )+ # sqrta*log(((sqrta*(1-2*c*cx/(c*(1+cx)+a+sqrta*sqrt(a+c*sx*sx)))))*(2*sx*sx/pow((1-cx),2)))) # assign a value to invalid entries, backward inLog = sqrt_lmbd*(1-2*c*C_1/( div_guard +c*(1+C_1)+lambd+sqrt_lmbd*torch.sqrt(lambd+c*S_1*S_1+div_guard) ) )*2*sqcot_1 inLog.masked_fill_(filter_mask | (inLog<=0), 1.0) # inLog = torch.where(invalid_values|(lambd==0), torch.ones_like(theta_1), div_guard +sqrt_lmbd*(1-2*c*C_1/( div_guard +c*(1+C_1)+lambd+sqrt_lmbd*torch.sqrt(lambd+c*S_1*S_1)+div_guard ) )*2*cot_1) I_1 = -0.5*torch.sign(S_1)*(2*sqrt_c*torch.atan((sqrt_c*C_1) / (torch.sqrt(lambd+S_1*S_1*c+div_guard) ) )+sqrt_lmbd*torch.log(inLog)) assert(check_values(I_1)) inLog = sqrt_lmbd*(1-2*c*C_2/( div_guard +c*(1+C_2)+lambd+sqrt_lmbd*torch.sqrt(lambd+c*S_2*S_2+div_guard) ) )*2*sqcot_2 inLog.masked_fill_(filter_mask | (inLog<=0), 1.0) I_2 = -0.5*torch.sign(S_2)*(2*sqrt_c*torch.atan((sqrt_c*C_2) / (torch.sqrt(lambd+S_2*S_2*c+div_guard) ) )+sqrt_lmbd*torch.log(inLog)) assert(check_values(I_2)) myInt = -1/(4*np.pi)*torch.abs(I_1-I_2-sqrt_c*beta) myInt.masked_fill_(filter_mask, 0.0) return myInt

y = torch.from_numpy(np.arange(6).reshape(1, 2, 3) * 2); print(y) # tensor([[[ 0, 2, 4], [ 6, 8, 10]]]) index = torch.tensor([[[0, 1, 2], [0, 1, 2]]], dtype=torch.long) print(x.scatter_add(-1, index, y)) # tensor([[[ 0, 3, 6], [ 9, 12, 15]]]) index = torch.tensor([[[0, 2, 1], [0, 1, 2]]], dtype=torch.long) print(x.scatter_add(-1, index, y)) # tensor([[[ 0, 5, 4], [ 9, 12, 15]]]); 索引 shape 和 y.shape 相同, 是将 y 加到 x 对应位置上 index = torch.tensor([[[0, 1, 3], [0, 1, 2]]], dtype=torch.long) print(x.scatter_add(-1, index, y)) # RuntimeError: Invalid index in scatterAdd; 无法索引到 3 # for different shape x = torch.arange(6).view(2, 3); print(x) # tensor([[0, 1, 2], [3, 4, 5]]) y = torch.arange(4).view(2, 2); print(y) # tensor([[0, 1], [2, 3]]) index = torch.tensor([[0, 1], [1, 0]]); print(index) # tensor([[0, 1], [1, 0]]) print(x.scatter_add(1, index, y)) # tensor([[0, 2, 2], [6, 6, 5]]) ## sign a = torch.tensor([0, 1, 2, 3, -4]); print(a) # tensor([ 0, 1, 2, 3, -4]) print(torch.sign(a)) # tensor([ 0, 1, 1, 1, -1]) ## to device x = torch.LongTensor([[1], [2], [3]]); print(x, x.dtype) # tensor([[1], [2], [3]]) torch.int64 print(x.to('cpu')) print(x.to('cuda')) print(x.device) # cpu ## dtype x = torch.LongTensor([[1], [2], [3]]); print(x, x.dtype) # tensor([[1], [2], [3]]) torch.int64 print(x.type(torch.int).dtype) # torch.int32 print(torch.tensor(x).dtype) # torch.int64 print(torch.tensor([1, 2, 3])) # tensor([1, 2, 3]); 是整数 print(torch.randint(3, (1, 2))) # tensor([[1., 0.]]); 是 float print(torch.randint(3, (1, 2), dtype=torch.long)) # tensor([[1, 1]]); 整形

def rprop(opfunc, x, config, state=None): """ A plain implementation of RPROP ARGS: - `opfunc` : a function that takes a single input (X), the point of evaluation, and returns f(X) and df/dX - `x` : the initial point - `state` : a table describing the state of the optimizer; after each call the state is modified - `state['stepsize']` : initial step size, common to all components - `state['etaplus']` : multiplicative increase factor, > 1 (default 1.2) - `state['etaminus']` : multiplicative decrease factor, < 1 (default 0.5) - `state['stepsizemax']` : maximum stepsize allowed (default 50) - `state['stepsizemin']` : minimum stepsize allowed (default 1e-6) - `state['niter']` : number of iterations (default 1) RETURN: - `x` : the new x vector - `f(x)` : the function, evaluated before the update (Martin Riedmiller, Koray Kavukcuoglu 2013) """ if config is None and state is None: raise ValueError( "rprop requires a dictionary to retain state between iterations") # (0) get/update state state = state if state is not None else config stepsize = config.get('stepsize', 0.1) etaplus = config.get('etaplus', 1.2) etaminus = config.get('etaminus', 0.5) stepsizemax = config.get('stepsizemax', 50.0) stepsizemin = config.get('stepsizemin', 1e-06) niter = config.get('niter', 1) hfx = [] for i in range(niter): # (1) evaluate f(x) and df/dx fx, dfdx = opfunc(x) # init temp storage if 'delta' not in state: state['delta'] = dfdx.new(dfdx.size()).zero_() state['stepsize'] = dfdx.new(dfdx.size()).fill_(stepsize) state['sign'] = dfdx.new(dfdx.size()) state['bytesign'] = torch.ByteTensor(dfdx.size()) state['psign'] = torch.ByteTensor(dfdx.size()) state['nsign'] = torch.ByteTensor(dfdx.size()) state['zsign'] = torch.ByteTensor(dfdx.size()) state['dminmax'] = torch.ByteTensor(dfdx.size()) if str(type(x)).find('Cuda') > -1: # Push to GPU state['psign'] = state['psign'].cuda() state['nsign'] = state['nsign'].cuda() state['zsign'] = state['zsign'].cuda() state['dminmax'] = state['dminmax'].cuda() # sign of derivative from last step to this one torch.mul(dfdx, state['delta'], out=state['sign']).sign_() # get indices of >0, <0 and ==0 entries torch.gt(state['sign'], 0, out=state['psign']) torch.lt(state['sign'], 0, out=state['nsign']) torch.eq(state['sign'], 0, out=state['zsign']) # get step size updates state['sign'][state['psign']] = etaplus state['sign'][state['nsign']] = etaminus state['sign'][state['zsign']] = 1 # update stepsizes with step size updates state['stepsize'].mul_(state['sign']) # threshold step sizes # >50 => 50 torch.gt(state['stepsize'], stepsizemax, out=state['dminmax']) state['stepsize'][state['dminmax']] = stepsizemax # <1e-6 ==> 1e-6 torch.lt(state['stepsize'], stepsizemin, out=state['dminmax']) state['stepsize'][state['dminmax']] = stepsizemin # for dir<0, dfdx=0 # for dir>=0 dfdx=dfdx dfdx[state['nsign']] = 0 torch.sign(dfdx, out=state['sign']) # update weights x.addcmul_(-1, state['sign'], state['stepsize']) # update state['dfdx'] with current dfdx state['delta'].copy_(dfdx) hfx.append(fx) # return x*, table of f(x) values from each step return x, hfx

def updateBN(): for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.weight.grad.data.add_(args.s*torch.sign(m.weight.data)) # L1