def test_backprop_mode_affects_chainerx(self): # chainer.{no,force}_backprop_mode should affect chainerx's # counterpart. assert chainerx.is_backprop_required() # nobp with chainer.no_backprop_mode(): assert not chainerx.is_backprop_required() # nobp > forcebp with chainer.force_backprop_mode(): assert chainerx.is_backprop_required() # nobp > nobp with chainer.no_backprop_mode(): assert not chainerx.is_backprop_required() assert chainerx.is_backprop_required() # forcebp with chainer.force_backprop_mode(): assert chainerx.is_backprop_required() # forcebp > forcebp with chainer.force_backprop_mode(): assert chainerx.is_backprop_required() # forcebp > nobp with chainer.no_backprop_mode(): assert not chainerx.is_backprop_required() assert chainerx.is_backprop_required()
def test_force_backprop_mode(self): with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): y = self.x + 1 self.assertTrue(y.creator_node is not None) y = self.x + 1 self.assertTrue(y.creator_node is not None) with chainer.force_backprop_mode(): y = self.x + 1 self.assertTrue(y.creator_node is not None)
def all_prob(self): with chainer.force_backprop_mode(): if self.min_prob > 0: assert False return (F.softmax(self.beta * self.logits) * (1 - self.min_prob * self.n)) + self.min_prob else: # consider to use something like https://stable-baselines.readthedocs.io/en/master/_modules/stable_baselines/common/distributions.html#MultiCategoricalProbabilityDistribution # and https://stable-baselines.readthedocs.io/en/master/common/distributions.html return self.get_all_prob_or_log_prob(is_log=False)
def greedy_actions(self): with chainer.force_backprop_mode(): a = self.mu if self.min_action is not None: a = F.maximum( self.xp.broadcast_to(self.min_action, a.data.shape), a) if self.max_action is not None: a = F.minimum( self.xp.broadcast_to(self.max_action, a.data.shape), a) return a
def max_as_distribution(self): """Return the return distributions of the greedy actions. Returns: chainer.Variable: Return distributions. Its shape will be (batch_size, n_atoms). """ with chainer.force_backprop_mode(): return self.q_dist[self.xp.arange(self.q_values.shape[0]), self.greedy_actions.array]
def predict(self, image, label, backprop=True): with chainer.using_config('train', False): if backprop: with chainer.force_backprop_mode(): ret = self.model.predictor( self.model.preprocess((image, label, 0.))) else: with chainer.no_backprop_mode(): ret = self.model.predictor( self.model.preprocess((image, label, 0.))) return ret[0]
def craft(self, image, label): original_image = image.copy() image = chainer.Parameter(image) xp = chainer.cuda.get_array_module(image.data) grads = xp.empty((image.shape[0], self.n_class) + image.shape[1:], dtype=xp.float32) for i in range(self.max_iter): prediction = self.predict(image, label, backprop=True) changed = xp.argmax(prediction.data, axis=1) != label if changed.all(): break for k in range(self.n_class): image.grad = None with chainer.force_backprop_mode(): self.backprop(loss=F.sum(prediction[:, k])) grads[:, k] = image.grad grads -= grads[list(range(image.shape[0])), label].reshape(grads.shape[0], 1, *grads.shape[2:]) prediction = prediction.data prediction -= prediction[list(range(image.shape[0])), label].reshape((image.shape[0], 1)) w_norm = (grads**2).sum(axis=tuple(range(2, grads.ndim))) fw, fw2 = chainer.cuda.elementwise( 'T w, T f', 'T fw, T fw2', ''' f = abs(f); if (f > 0 || w > 0) { fw = f / sqrt(w); fw2 = (f + 0.0001) / w; } else { // correct class label fw = 1000000000.0; fw2 = 0.0; } ''', 'deep_fool')(w_norm, prediction) change = xp.argmin(fw, axis=1) tmp = image.data + (1 + self.overshoot) * fw2[ list(range(image.shape[0])), change].reshape( (-1, ) + (1, ) * (image.ndim - 1)) * (grads[list(range(image.shape[0])), change]) image.data[~changed] = tmp[~changed] prediction = self.predict(image, label, backprop=False) changed = xp.argmax(prediction.data, axis=1) != label image = image.data l2_dist = xp.sqrt( ((image - original_image)**2).sum(axis=tuple(range(1, image.ndim)))) l2_dist[~changed] = 1e9 self.l2_history.extend(list(l2_dist.get())) sys.stdout.write('\r' + str(len(self.l2_history)).rjust(5, '0')) sys.stdout.flush() return image
def __init__(self, model, args): xp = model.xp # convert to tuple of Variable if not isinstance(args, Sequence): args = [args] args = [ chainer.Variable(a) if not isinstance(a, chainer.Variable) else a for a in args ] # linkとfunctionそれぞれの計算グラフ情報を取得する # pruningはLinkに着目したほうが実装がシンプルになるが、conv-bn-gap-fcのようなFunctionを挟むケースも対応するため、 # それぞれ解析し、結果をVariableNodeのidを用いてマージしている # ここはもっとエレガントになるように変更予定 with chainer.using_config('train', False), chainer.force_backprop_mode(): with TraceLinkHook() as link_hook: outs = model(*args) if isinstance(outs, Mapping): outs = list(outs.values()) if not isinstance(outs, Sequence): outs = [outs] with TraceFunctionHook() as func_hook: for out in outs: out.grad = xp.ones_like(out.array) out.backward() self.links = link_hook.graph # type: Sequence[Node] # get global link name mapping = {id(link): name for name, link in model.namedlinks()} def replace_name(node): node.name = mapping[node.id] return node self.links = [replace_name(node) for node in self.links] self.functions = func_hook.graph # type: Sequence[Node] self.functions = list(reversed(self.functions)) nodes = list() nodes.extend(self.links) nodes.extend(self.functions) self.graph = self._traverse_connections(nodes)
def attack_loss(self, image, target, label): """ max(Z(X)_{real} - max{Z(X)_i:i\neq real}, -\kappa) :param image: current adversarial image :param target: target label :return: attack loss :rtype: chainer.Variable """ with chainer.force_backprop_mode(): Z = self.predict(image, label) xp = chainer.cuda.get_array_module(Z) tmp = xp.ones_like(Z, dtype=xp.float32) * 1e20 tmp[list(range(tmp.shape[0])), target] = -1e20 other = chainer.functions.minimum(Z, tmp) other = chainer.functions.max(other, 1) real = Z[list(range(tmp.shape[0])), target] Z_diff = real - other return chainer.functions.maximum(Z_diff + self._confidence, xp.zeros_like(Z_diff, dtype=xp.float32)), Z.data
def backward(self, inputs, grads): xp = chainer.cuda.get_array_module(*grads) y_array = self.output_data[0] grad_y = grads[0] y1_array, y2_array = xp.split(y_array, 2, axis=1) grad_y1, grad_y2 = xp.split(grad_y, 2, axis=1) a, b = y1_array.copy(), y2_array.copy() ga, gb = grad_y1.copy(), grad_y2.copy() for res_unit in self.chainlist[::-1]: b_var = chainer.Variable(b) with chainer.force_backprop_mode(): c_var = res_unit(b_var) c_var.grad = ga c_var.backward() a -= c_var.array gb += b_var.grad a, b = b, a ga, gb = gb, ga gx = xp.concatenate((ga, gb), axis=1) return gx,
def max(self): with chainer.force_backprop_mode(): if self.min_action is None and self.max_action is None: return F.reshape(self.v, (self.batch_size, )) else: return self.evaluate_actions(self.greedy_actions)
def craft(self, image, label): """ image is assumed to be in range [0, 255] :param image: :param label: :return: """ adversarial_image_org = (image / 255. - 0.5) * 2. * 0.999999 xp = chainer.cuda.get_array_module(adversarial_image_org) cost = xp.ones(image.shape[0], dtype=xp.float32) * self._initial_c upper_bound = xp.ones(label.shape) * self._max_c lower_bound = xp.ones(label.shape) * self._min_c o_best_l2 = xp.ones(label.shape) * 1e10 o_best_logit = xp.ones(label.shape) * -1 o_best_attack = image.copy() with chainer.force_backprop_mode(): for i in range(self._max_binary_step): msg_base = '\riter: ' + str(i) + ' ' adversarial_image = chainer.Parameter( xp.arctanh(adversarial_image_org)) opt = chainer.optimizers.Adam(alpha=self._lr) adversarial_image.update_rule = opt.create_update_rule() prev = xp.ones(adversarial_image.shape[0]) * 1e20 best_l2 = xp.ones(label.shape) * 1e10 best_logit = xp.ones(label.shape) * -1 for j in range(self._max_iter): # get scores feed_image = (chainer.functions.tanh(adversarial_image) * 0.5 + 0.5) * 255. l2_dist, loss_data, logit, loss = self.loss( feed_image, label, image, cost, label) # when optimization stuck, break if j % (self._max_iter // 10) + 1 == self._max_iter // 10: if (loss_data > prev * .9999).all(): break prev[:] = xp.minimum(prev, loss_data) if j % (self._max_iter // 10) + 1 == self._max_iter // 10: cmp = self.compare(logit, label) cmpl2 = best_l2 > l2_dist change = cmpl2 & ~cmp if change.any(): best_l2[change] = l2_dist[change] best_logit[change] = 1 o_cmpl2 = o_best_l2 > l2_dist change = o_cmpl2 & ~cmp if change.any(): o_best_l2[change] = l2_dist[change] o_best_attack[change] = ( xp.tanh(adversarial_image.data[change]) * 0.5 + 0.5) * 255. o_best_logit[change] = 1 self.cleargrads() adversarial_image.grad = None loss.backward() adversarial_image.update() # binary search for cost success = o_best_logit == 1 sys.stdout.write(msg_base + 'success: ' + str(success.sum())) sys.stdout.flush() success = best_logit == 1 upper_bound[success] = cost[success] lower_bound[~success] = cost[~success] do_bin_search = (upper_bound < 1e9) & ~success cost[do_bin_search] = (upper_bound + lower_bound)[do_bin_search] * .5 cost[~do_bin_search] *= 10 sys.stdout.write('\n') self.l2_history.extend(list(np.sqrt(o_best_l2.get()))) return o_best_attack
def calculate_local_lipschitz(self): print('\rlocal Lipschitz start', flush=True) iterator = self.iterator preprocess = self.preprocess target = self.target eval_func = self.eval_func or (lambda x: target(preprocess(x))) device = self.device or chainer.cuda.cupy.cuda.get_device_id() assert device >= 0 if self.eval_hook: self.eval_hook(self) # gradを計算して勾配をsamplingする if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) self.global_grad = chainer.cuda.cupy.zeros( (self.n_class, self.n_class), dtype=chainer.cuda.cupy.float32) margin_list = [] size = 0 total = len(it.dataset) for batch in it: size += len(batch) sys.stdout.write('\r{0}/{1}'.format(size, total)) sys.stdout.flush() x, t = self.converter(batch, device) xp = chainer.cuda.get_array_module(x) c = xp.ones((1, ), dtype=np.float32) local_grad = xp.zeros((self.n_class, self.n_class), dtype=xp.float32) with chainer.force_backprop_mode(): for _ in range(100): noise = xp.random.normal(size=x.shape).astype(xp.float32) normalize(noise) x2 = chainer.Parameter(x + noise) y, t, _ = eval_func((x2, t, c)) for i in range(self.n_class): for j in range(i + 1, self.n_class): if i == j: continue target.cleargrads() x2.grad = None F.sum(y[:, i] - y[:, j]).backward() norm = xp.max( xp.sqrt((x2.grad**2).sum( axis=tuple(range(1, x2.ndim))))) local_grad[i, j] = max(local_grad[i, j], norm) for i in range(self.n_class): for j in range(i + 1, self.n_class): local_grad[j, i] = local_grad[i, j] self.global_grad[:] = xp.maximum(self.global_grad, local_grad) with chainer.no_backprop_mode(): y, t, _ = eval_func((x, t, c)) y = y.array grad = local_grad[t] margins = self.get_margin( y, y[list(range(t.size)), t].reshape(t.size, 1), grad) margins = xp.min(margins, axis=1) margin_list.extend(list(margins.get())) return margin_list
def all_log_prob(self): with chainer.force_backprop_mode(): return F.log(self.all_prob)
def all_prob(self): with chainer.force_backprop_mode(): return mellowmax.maximum_entropy_mellowmax(self.values)
def max(self): with chainer.force_backprop_mode(): return F.select_item(self.q_values, self.greedy_actions)
def __forward(self, train, support_sets, support_lbls, x_set, x_lbl=None): model = self mod = self.__mod gpu = self.__gpu n_out = self.__n_out batch_size = support_sets[0].shape[0] N = len(support_sets) self.cleargrads() keys, Ws = self.embed_key(train, support_sets, support_lbls, x_set) key_mems, x_keys = keys grad_mems = [] grad_mems1 = [] for i in range(N): self.cleargrads() x = support_sets[i] x = Variable(x) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): x = F.reshape(x, (1, 1, 28, 28)) h = model.block1_1(x, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_2(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_3(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_4(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_5(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.reshape(h, (1, 64)) h = F.dropout(h, ratio=0.0, train=train) h = F.relu(model.fc1(h)) h = F.dropout(h, ratio=0.0, train=train) y = model.fc2(h) y_batch = mod.array(support_lbls[i], dtype=np.int32) lbl = Variable(y_batch) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): support_loss = F.softmax_cross_entropy(y, lbl) support_loss.backward(retain_grad=True) grads = [] grad_sections = [] grads.append(model.block1_1.conv.W.grad.reshape(-1, 1)) grad_sections.append(grads[-1].shape[0]) grads.append(model.block1_2.conv.W.grad.reshape(-1, 1)) grad_sections.append(grad_sections[-1] + grads[-1].shape[0]) grads.append(model.block1_3.conv.W.grad.reshape(-1, 1)) grad_sections.append(grad_sections[-1] + grads[-1].shape[0]) grads.append(model.block1_4.conv.W.grad.reshape(-1, 1)) grad_sections.append(grad_sections[-1] + grads[-1].shape[0]) grads.append(model.block1_5.conv.W.grad.reshape(-1, 1)) grads1 = [] grad_sections1 = [] grads1.append(model.fc1.W.grad.reshape(-1, 1)) grad_sections1.append(grads1[-1].shape[0]) grads1.append(model.fc2.W.grad.reshape(-1, 1)) meta_in = mod.concatenate(grads, axis=0) meta_in = cuda.to_cpu(meta_in) meta_in = logAndSign(meta_in, k=7) meta_in = mod.array(meta_in) meta_in = Variable(meta_in) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): meta_outs = F.relu( model.m_l1(F.dropout(meta_in, ratio=0.0, train=train))) meta_outs = F.relu( model.m_ll1( F.dropout(meta_outs, ratio=0.0, train=train))) meta_outs = model.meta_lstm_l2( F.dropout(meta_outs, ratio=0.0, train=train)) grad_mems.append(meta_outs) meta_in = mod.concatenate(grads1, axis=0) meta_in = cuda.to_cpu(meta_in) meta_in = logAndSign(meta_in, k=7) meta_in = mod.array(meta_in) meta_in = Variable(meta_in) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): meta_outs = F.relu( model.mc_l1(F.dropout(meta_in, ratio=0.0, train=train))) meta_outs = F.relu( model.mc_ll1( F.dropout(meta_outs, ratio=0.0, train=train))) meta_outs = model.meta_g_lstm_l2( F.dropout(meta_outs, ratio=0.0, train=train)) grad_mems1.append(meta_outs) grad_mems = F.concat(grad_mems, axis=1) grad_mems1 = F.concat(grad_mems1, axis=1) x_keys = F.split_axis(x_keys, x_set.shape[0], axis=0) x = Variable(x_set) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): xs = F.split_axis(x, x_set.shape[0], axis=0) x_loss = 0 preds = [] for x, x_key, lbl in zip(xs, x_keys, x_lbl): x_key = F.reshape(x_key, (1, -1)) sc = F.softmax(cosine_similarity2d(key_mems, x_key)) meta_outs = F.matmul(grad_mems, sc, transb=True) meta_outs1 = F.matmul(grad_mems1, sc, transb=True) meta_outs = F.split_axis(meta_outs, grad_sections, axis=0) meta_outs1 = F.split_axis(meta_outs1, grad_sections1, axis=0) block1_1_W = F.reshape(meta_outs[0], model.block1_1.conv.W.data.shape) block1_2_W = F.reshape(meta_outs[1], model.block1_2.conv.W.data.shape) block1_3_W = F.reshape(meta_outs[2], model.block1_3.conv.W.data.shape) block1_4_W = F.reshape(meta_outs[3], model.block1_4.conv.W.data.shape) block1_5_W = F.reshape(meta_outs[4], model.block1_5.conv.W.data.shape) fc1_W = F.reshape(meta_outs1[0], model.fc1.W.data.shape) fc2_W = F.reshape(meta_outs1[1], model.fc2.W.data.shape) x = F.reshape(x, (1, 1, 28, 28)) x = F.dropout(x, ratio=0.0, train=train) h = model.block1_1(x, train) + model.block1_1.call_on_W( x, block1_1_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_2(h, train) + model.block1_2.call_on_W( h, block1_2_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_3(h, train) + model.block1_3.call_on_W( h, block1_3_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_4(h, train) + model.block1_4.call_on_W( h, block1_4_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.block1_5(h, train) + model.block1_5.call_on_W( h, block1_5_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.reshape(h, (1, 64)) h = F.dropout(h, ratio=0.0, train=train) h = F.relu(model.fc1(h)) + F.relu(F.matmul(h, fc1_W, transb=True)) h = F.dropout(h, ratio=0.0, train=train) y = model.fc2(h) + F.matmul(h, fc2_W, transb=True) y_batch = mod.array(lbl, dtype=np.int32).reshape((1, )) lbl = Variable(y_batch) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): x_loss += F.softmax_cross_entropy(y, lbl) preds += mod.argmax(y.data, 1).tolist() return preds, x_loss
def all_log_prob(self): with chainer.force_backprop_mode(): return F.log_softmax(self.beta * self.logits)
def wrapper(self, structure, Rc, *params): differentiate_more = self._order > 0 with chainer.using_config('enable_backprop', differentiate_more): G = func(self, structure, Rc, *params) yield F.stack([F.stack(g) for g in G]) n_atom = len(G[0]) diff_positions = [] diff_indices = [] for i_pos, i_idx, j_pos, j_idx in structure.get_neighbor_info( Rc, ['i_positions', 'i_indices', 'j_positions', 'j_indices'] ): diff_positions.extend([i_pos, j_pos]) diff_indices.extend([i_idx, j_idx]) differentiate_more = self._order > 1 with chainer.using_config('enable_backprop', differentiate_more): dG = [] for g in G: with chainer.force_backprop_mode(): grad = chainer.grad( g, diff_positions, enable_double_backprop=differentiate_more) dg = [ # by center atom itself F.concat([ F.sum(dg_, axis=0) for dg_ in F.split_axis( grad[2*i], diff_indices[2*i][1:], axis=0 ) ], axis=0) # by neighbor atoms + F.concat([ F.sum(dg_, axis=0) for dg_ in F.split_axis( grad[2*i+1], diff_indices[2*i+1][1:], axis=0 ) ], axis=0) for i in range(n_atom) ] dG.append(dg) yield F.stack([F.stack(dg) for dg in dG]) differentiate_more = self._order > 2 with chainer.using_config('enable_backprop', differentiate_more): d2G = [] for dg in dG: d2g = [] for j in range(3 * n_atom): with chainer.force_backprop_mode(): grad = chainer.grad( [dg_[j] for dg_ in dg], diff_positions, enable_double_backprop=differentiate_more) d2g_ = [ # by center atom itself F.concat([ F.sum(d2g_, axis=0) for d2g_ in F.split_axis( grad[2*i], diff_indices[2*i][1:], axis=0 ) ], axis=0) # by neighbor atoms + F.concat([ F.sum(d2g_, axis=0) for d2g_ in F.split_axis( grad[2*i+1], diff_indices[2*i+1][1:], axis=0 ) ], axis=0) for i in range(n_atom) ] d2g.append(d2g_) d2G.append(d2g) yield F.stack([F.stack([F.stack(d2g_) for d2g_ in d2g]) for d2g in d2G]).transpose(0, 2, 1, 3)
def embed_key(self, train, support_sets, support_lbls, x_set): mod = self.__mod model = self IT = 5 model.meta_lstm_l1.reset_state() model.meta_g_lstm_l1.reset_state() N = len(support_sets) for i in xrange(0, N, N / IT): self.cleargrads() x = mod.concatenate(support_sets[i:(i + N / IT)], axis=0) x = Variable(x) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): x = F.reshape(x, (-1, 1, 28, 28)) x = F.dropout(x, ratio=0.0, train=train) h = model.key_1(x, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_2(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_3(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_4(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_5(h, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.reshape(h, (-1, 64)) h = F.dropout(h, ratio=0.0, train=train) h = F.relu(model.key_fc1(h)) y = model.key_fc2(h) y_batch = mod.array(support_lbls[i:(i + N / IT)], dtype=np.int32).reshape((-1, )) lbl = Variable(y_batch) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): loss = F.softmax_cross_entropy(y, lbl) loss.backward(retain_grad=True) grads = [] grad_sections = [] grads.append(model.key_1.conv.W.grad.reshape(-1, 1)) grad_sections.append(grads[-1].shape[0]) grads.append(model.key_2.conv.W.grad.reshape(-1, 1)) grad_sections.append(grad_sections[-1] + grads[-1].shape[0]) grads.append(model.key_3.conv.W.grad.reshape(-1, 1)) grad_sections.append(grad_sections[-1] + grads[-1].shape[0]) grads.append(model.key_4.conv.W.grad.reshape(-1, 1)) grad_sections.append(grad_sections[-1] + grads[-1].shape[0]) grads.append(model.key_5.conv.W.grad.reshape(-1, 1)) grads1 = [] grad_sections1 = [] grads1.append(model.key_fc1.W.grad.reshape(-1, 1)) grad_sections1.append(grads1[-1].shape[0]) grads1.append(model.key_fc2.W.grad.reshape(-1, 1)) meta_in = mod.concatenate(grads, axis=0) meta_in = cuda.to_cpu(meta_in) meta_in = logAndSign(meta_in, k=7) meta_in = mod.array(meta_in) meta_in = Variable(meta_in) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): meta_outs = model.meta_lstm_l1( F.dropout(meta_in, ratio=0.0, train=train)) meta_outs = model.meta_lstm_ll1( F.dropout(meta_outs, ratio=0.0, train=train)) meta_in = mod.concatenate(grads1, axis=0) meta_in = cuda.to_cpu(meta_in) meta_in = logAndSign(meta_in, k=7) meta_in = mod.array(meta_in) meta_in = Variable(meta_in) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): meta_outs1 = model.meta_g_lstm_l1( F.dropout(meta_in, ratio=0.0, train=train)) meta_outs1 = model.meta_g_lstm_ll1( F.dropout(meta_outs1, ratio=0.0, train=train)) meta_outs = F.split_axis(meta_outs, grad_sections, axis=0) meta_outs1 = F.split_axis(meta_outs1, grad_sections1, axis=0) key_1_W = F.reshape(meta_outs[0], model.key_1.conv.W.data.shape) key_2_W = F.reshape(meta_outs[1], model.key_2.conv.W.data.shape) key_3_W = F.reshape(meta_outs[2], model.key_3.conv.W.data.shape) key_4_W = F.reshape(meta_outs[3], model.key_4.conv.W.data.shape) key_5_W = F.reshape(meta_outs[4], model.key_5.conv.W.data.shape) key_fc1_W = F.reshape(meta_outs1[0], model.key_fc1.W.data.shape) key_fc2_W = F.reshape(meta_outs1[1], model.key_fc2.W.data.shape) self.cleargrads() keys = [] for x in [support_sets, x_set]: x = mod.asarray(x, dtype=np.float32).reshape((-1, 1, 28, 28)) x = Variable(x) with chainer.no_backprop_mode(): with chainer.force_backprop_mode(): x = F.dropout(x, ratio=0.0, train=train) h = model.key_1(x, train) + model.key_1.call_on_W( x, key_1_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_2(h, train) + model.key_2.call_on_W( h, key_2_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_3(h, train) + model.key_3.call_on_W( h, key_3_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_4(h, train) + model.key_4.call_on_W( h, key_4_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.dropout(h, ratio=0.0, train=train) h = model.key_5(h, train) + model.key_5.call_on_W( h, key_5_W, train) h = F.max_pooling_2d(h, ksize=2, stride=2) h = F.reshape(h, (-1, 64)) h = F.dropout(h, ratio=0.0, train=train) h = model.key_fc1(h) + F.matmul(h, key_fc1_W, transb=True) keys.append(h) Ws = [ key_1_W, key_2_W, key_3_W, key_4_W, key_5_W, key_fc1_W, key_fc2_W ] return keys, Ws
def q_values(self): with chainer.force_backprop_mode(): return F.mean(self.quantiles, axis=1)
def greedy_actions(self): with chainer.force_backprop_mode(): return self.maximizer()
def max(self): with chainer.force_backprop_mode(): return self.evaluator(self.greedy_actions)
def entropy(self): with chainer.force_backprop_mode(): return -F.sum(self.all_prob * self.all_log_prob, axis=1)
def all_log_prob(self): with chainer.force_backprop_mode(): if self.min_prob > 0: return F.log(self.all_prob) else: return F.log_softmax(self.beta * self.logits)
def craft(self, image, label): """ image is assumed to be in range [0, 1] :param image: :param label: :return: """ adversarial_image_org = (image - 0.5) * 2. * 0.999999 xp = chainer.cuda.get_array_module(adversarial_image_org) cost = xp.ones(image.shape[0], dtype=xp.float32) * self._initial_c upper_bound = xp.ones(label.shape) * self._max_c lower_bound = xp.ones(label.shape) * self._min_c o_best_l2 = xp.ones(label.shape) * 1e10 o_best_logit = xp.ones(label.shape) * -1 o_best_attack = image.copy() with chainer.force_backprop_mode(): for i in range(self._max_binary_step): msg_base = '\riter: ' + str(i) + ' ' best_logit = xp.ones(label.shape) * -1 best_l2 = xp.ones(label.shape) * 1e10 for r in range(self.n_restart): if r > 0: start_img = adversarial_image_org + xp.random.normal( scale=1e-2 / float( np.sqrt(np.prod(image.shape[1:]))), size=image.shape).astype(xp.float32) else: start_img = adversarial_image_org adversarial_image = chainer.Parameter(xp.arctanh(start_img)) opt = chainer.optimizers.Adam(alpha=self._lr) adversarial_image.update_rule = opt.create_update_rule() for j in range(self._max_iter): # get scores feed_image = (chainer.functions.tanh( adversarial_image) * 0.5 + 0.5) l2_dist, loss_data, logit, loss = self.loss(feed_image, label, image, cost, label) cmp = self.compare(logit, label) cmpl2 = best_l2 > l2_dist change = cmpl2 & ~cmp if change.any(): best_l2[change] = l2_dist[change] best_logit[change] = 1 o_cmpl2 = o_best_l2 > l2_dist change = o_cmpl2 & ~cmp if change.any(): o_best_l2[change] = l2_dist[change] o_best_attack[change] = (xp.tanh( adversarial_image.data[change]) * 0.5 + 0.5) o_best_logit[change] = 1 self.cleargrads() adversarial_image.grad = None loss.backward() adversarial_image.update() # binary search for cost success = o_best_logit == 1 if not self.noprint: sys.stdout.write(msg_base + 'success: ' + str(success.sum())) sys.stdout.flush() success = best_logit == 1 upper_bound[success] = xp.minimum(upper_bound[success], cost[success]) lower_bound[~success] = xp.maximum(lower_bound[~success], cost[~success]) do_bin_search = upper_bound < (self._max_c - 1) cost[do_bin_search] = (upper_bound + lower_bound)[ do_bin_search] * .5 cost[~do_bin_search & ~success] *= 10 if not self.noprint: sys.stdout.write('\n') self.l2_history.extend(list(np.sqrt(o_best_l2.get()))) return o_best_attack