Ejemplo n.º 1
0
    def test_backprop_mode_affects_chainerx(self):
        # chainer.{no,force}_backprop_mode should affect chainerx's
        # counterpart.

        assert chainerx.is_backprop_required()

        # nobp
        with chainer.no_backprop_mode():
            assert not chainerx.is_backprop_required()

            # nobp > forcebp
            with chainer.force_backprop_mode():
                assert chainerx.is_backprop_required()

            # nobp > nobp
            with chainer.no_backprop_mode():
                assert not chainerx.is_backprop_required()

        assert chainerx.is_backprop_required()

        # forcebp
        with chainer.force_backprop_mode():
            assert chainerx.is_backprop_required()

            # forcebp > forcebp
            with chainer.force_backprop_mode():
                assert chainerx.is_backprop_required()

            # forcebp > nobp
            with chainer.no_backprop_mode():
                assert not chainerx.is_backprop_required()

        assert chainerx.is_backprop_required()
Ejemplo n.º 2
0
    def test_force_backprop_mode(self):
        with chainer.no_backprop_mode():
            with chainer.force_backprop_mode():
                y = self.x + 1
        self.assertTrue(y.creator_node is not None)

        y = self.x + 1
        self.assertTrue(y.creator_node is not None)

        with chainer.force_backprop_mode():
            y = self.x + 1
        self.assertTrue(y.creator_node is not None)
Ejemplo n.º 3
0
 def all_prob(self):
     with chainer.force_backprop_mode():
         if self.min_prob > 0:
             assert False
             return (F.softmax(self.beta * self.logits) *
                     (1 - self.min_prob * self.n)) + self.min_prob
         else:
             # consider to use something like https://stable-baselines.readthedocs.io/en/master/_modules/stable_baselines/common/distributions.html#MultiCategoricalProbabilityDistribution
             # and https://stable-baselines.readthedocs.io/en/master/common/distributions.html
             return self.get_all_prob_or_log_prob(is_log=False)
Ejemplo n.º 4
0
 def greedy_actions(self):
     with chainer.force_backprop_mode():
         a = self.mu
         if self.min_action is not None:
             a = F.maximum(
                 self.xp.broadcast_to(self.min_action, a.data.shape), a)
         if self.max_action is not None:
             a = F.minimum(
                 self.xp.broadcast_to(self.max_action, a.data.shape), a)
         return a
Ejemplo n.º 5
0
    def max_as_distribution(self):
        """Return the return distributions of the greedy actions.

        Returns:
            chainer.Variable: Return distributions. Its shape will be
                (batch_size, n_atoms).
        """
        with chainer.force_backprop_mode():
            return self.q_dist[self.xp.arange(self.q_values.shape[0]),
                               self.greedy_actions.array]
Ejemplo n.º 6
0
 def predict(self, image, label, backprop=True):
     with chainer.using_config('train', False):
         if backprop:
             with chainer.force_backprop_mode():
                 ret = self.model.predictor(
                     self.model.preprocess((image, label, 0.)))
         else:
             with chainer.no_backprop_mode():
                 ret = self.model.predictor(
                     self.model.preprocess((image, label, 0.)))
         return ret[0]
Ejemplo n.º 7
0
 def craft(self, image, label):
     original_image = image.copy()
     image = chainer.Parameter(image)
     xp = chainer.cuda.get_array_module(image.data)
     grads = xp.empty((image.shape[0], self.n_class) + image.shape[1:],
                      dtype=xp.float32)
     for i in range(self.max_iter):
         prediction = self.predict(image, label, backprop=True)
         changed = xp.argmax(prediction.data, axis=1) != label
         if changed.all():
             break
         for k in range(self.n_class):
             image.grad = None
             with chainer.force_backprop_mode():
                 self.backprop(loss=F.sum(prediction[:, k]))
             grads[:, k] = image.grad
         grads -= grads[list(range(image.shape[0])),
                        label].reshape(grads.shape[0], 1, *grads.shape[2:])
         prediction = prediction.data
         prediction -= prediction[list(range(image.shape[0])),
                                  label].reshape((image.shape[0], 1))
         w_norm = (grads**2).sum(axis=tuple(range(2, grads.ndim)))
         fw, fw2 = chainer.cuda.elementwise(
             'T w, T f', 'T fw, T fw2', '''
             f = abs(f);
             if (f > 0 || w > 0) {
               fw = f / sqrt(w);
               fw2 = (f + 0.0001) / w;
             } else {
               // correct class label
               fw = 1000000000.0;
               fw2 = 0.0;
             }
             ''', 'deep_fool')(w_norm, prediction)
         change = xp.argmin(fw, axis=1)
         tmp = image.data + (1 + self.overshoot) * fw2[
             list(range(image.shape[0])), change].reshape(
                 (-1, ) + (1, ) *
                 (image.ndim - 1)) * (grads[list(range(image.shape[0])),
                                            change])
         image.data[~changed] = tmp[~changed]
     prediction = self.predict(image, label, backprop=False)
     changed = xp.argmax(prediction.data, axis=1) != label
     image = image.data
     l2_dist = xp.sqrt(
         ((image -
           original_image)**2).sum(axis=tuple(range(1, image.ndim))))
     l2_dist[~changed] = 1e9
     self.l2_history.extend(list(l2_dist.get()))
     sys.stdout.write('\r' + str(len(self.l2_history)).rjust(5, '0'))
     sys.stdout.flush()
     return image
Ejemplo n.º 8
0
    def __init__(self, model, args):
        xp = model.xp

        # convert to tuple of Variable
        if not isinstance(args, Sequence):
            args = [args]
        args = [
            chainer.Variable(a) if not isinstance(a, chainer.Variable) else a
            for a in args
        ]

        # linkとfunctionそれぞれの計算グラフ情報を取得する
        # pruningはLinkに着目したほうが実装がシンプルになるが、conv-bn-gap-fcのようなFunctionを挟むケースも対応するため、
        # それぞれ解析し、結果をVariableNodeのidを用いてマージしている
        # ここはもっとエレガントになるように変更予定
        with chainer.using_config('train',
                                  False), chainer.force_backprop_mode():
            with TraceLinkHook() as link_hook:
                outs = model(*args)
            if isinstance(outs, Mapping):
                outs = list(outs.values())
            if not isinstance(outs, Sequence):
                outs = [outs]

            with TraceFunctionHook() as func_hook:
                for out in outs:
                    out.grad = xp.ones_like(out.array)
                    out.backward()

        self.links = link_hook.graph  # type: Sequence[Node]
        # get global link name
        mapping = {id(link): name for name, link in model.namedlinks()}

        def replace_name(node):
            node.name = mapping[node.id]
            return node

        self.links = [replace_name(node) for node in self.links]

        self.functions = func_hook.graph  # type: Sequence[Node]
        self.functions = list(reversed(self.functions))

        nodes = list()
        nodes.extend(self.links)
        nodes.extend(self.functions)

        self.graph = self._traverse_connections(nodes)
Ejemplo n.º 9
0
Archivo: cw.py Proyecto: ytsmiling/lmt
    def attack_loss(self, image, target, label):
        """
        max(Z(X)_{real} - max{Z(X)_i:i\neq real}, -\kappa)
        :param image: current adversarial image
        :param target: target label
        :return: attack loss
        :rtype: chainer.Variable
        """

        with chainer.force_backprop_mode():
            Z = self.predict(image, label)
        xp = chainer.cuda.get_array_module(Z)
        tmp = xp.ones_like(Z, dtype=xp.float32) * 1e20
        tmp[list(range(tmp.shape[0])), target] = -1e20
        other = chainer.functions.minimum(Z, tmp)
        other = chainer.functions.max(other, 1)
        real = Z[list(range(tmp.shape[0])), target]
        Z_diff = real - other
        return chainer.functions.maximum(Z_diff + self._confidence,
                                         xp.zeros_like(Z_diff,
                                                       dtype=xp.float32)), Z.data
Ejemplo n.º 10
0
    def backward(self, inputs, grads):
        xp = chainer.cuda.get_array_module(*grads)
        y_array = self.output_data[0]
        grad_y = grads[0]

        y1_array, y2_array = xp.split(y_array, 2, axis=1)
        grad_y1, grad_y2 = xp.split(grad_y, 2, axis=1)

        a, b = y1_array.copy(), y2_array.copy()
        ga, gb = grad_y1.copy(), grad_y2.copy()
        for res_unit in self.chainlist[::-1]:
            b_var = chainer.Variable(b)
            with chainer.force_backprop_mode():
                c_var = res_unit(b_var)
                c_var.grad = ga
                c_var.backward()
            a -= c_var.array
            gb += b_var.grad
            a, b = b, a
            ga, gb = gb, ga

        gx = xp.concatenate((ga, gb), axis=1)
        return gx,
Ejemplo n.º 11
0
 def max(self):
     with chainer.force_backprop_mode():
         if self.min_action is None and self.max_action is None:
             return F.reshape(self.v, (self.batch_size, ))
         else:
             return self.evaluate_actions(self.greedy_actions)
Ejemplo n.º 12
0
    def craft(self, image, label):
        """
        image is assumed to be in range [0, 255]
        :param image:
        :param label:
        :return:
        """

        adversarial_image_org = (image / 255. - 0.5) * 2. * 0.999999
        xp = chainer.cuda.get_array_module(adversarial_image_org)
        cost = xp.ones(image.shape[0], dtype=xp.float32) * self._initial_c
        upper_bound = xp.ones(label.shape) * self._max_c
        lower_bound = xp.ones(label.shape) * self._min_c
        o_best_l2 = xp.ones(label.shape) * 1e10
        o_best_logit = xp.ones(label.shape) * -1
        o_best_attack = image.copy()
        with chainer.force_backprop_mode():
            for i in range(self._max_binary_step):
                msg_base = '\riter: ' + str(i) + ' '
                adversarial_image = chainer.Parameter(
                    xp.arctanh(adversarial_image_org))
                opt = chainer.optimizers.Adam(alpha=self._lr)
                adversarial_image.update_rule = opt.create_update_rule()
                prev = xp.ones(adversarial_image.shape[0]) * 1e20
                best_l2 = xp.ones(label.shape) * 1e10
                best_logit = xp.ones(label.shape) * -1
                for j in range(self._max_iter):
                    # get scores
                    feed_image = (chainer.functions.tanh(adversarial_image) *
                                  0.5 + 0.5) * 255.
                    l2_dist, loss_data, logit, loss = self.loss(
                        feed_image, label, image, cost, label)
                    # when optimization stuck, break
                    if j % (self._max_iter // 10) + 1 == self._max_iter // 10:
                        if (loss_data > prev * .9999).all():
                            break
                        prev[:] = xp.minimum(prev, loss_data)
                    if j % (self._max_iter // 10) + 1 == self._max_iter // 10:
                        cmp = self.compare(logit, label)
                        cmpl2 = best_l2 > l2_dist
                        change = cmpl2 & ~cmp
                        if change.any():
                            best_l2[change] = l2_dist[change]
                            best_logit[change] = 1
                        o_cmpl2 = o_best_l2 > l2_dist
                        change = o_cmpl2 & ~cmp
                        if change.any():
                            o_best_l2[change] = l2_dist[change]
                            o_best_attack[change] = (
                                xp.tanh(adversarial_image.data[change]) * 0.5 +
                                0.5) * 255.
                            o_best_logit[change] = 1
                    self.cleargrads()
                    adversarial_image.grad = None
                    loss.backward()
                    adversarial_image.update()
                # binary search for cost
                success = o_best_logit == 1
                sys.stdout.write(msg_base + 'success: ' + str(success.sum()))
                sys.stdout.flush()
                success = best_logit == 1
                upper_bound[success] = cost[success]
                lower_bound[~success] = cost[~success]
                do_bin_search = (upper_bound < 1e9) & ~success
                cost[do_bin_search] = (upper_bound +
                                       lower_bound)[do_bin_search] * .5
                cost[~do_bin_search] *= 10
            sys.stdout.write('\n')
            self.l2_history.extend(list(np.sqrt(o_best_l2.get())))
            return o_best_attack
Ejemplo n.º 13
0
    def calculate_local_lipschitz(self):
        print('\rlocal Lipschitz start', flush=True)
        iterator = self.iterator
        preprocess = self.preprocess
        target = self.target
        eval_func = self.eval_func or (lambda x: target(preprocess(x)))
        device = self.device or chainer.cuda.cupy.cuda.get_device_id()
        assert device >= 0

        if self.eval_hook:
            self.eval_hook(self)

        # gradを計算して勾配をsamplingする
        if hasattr(iterator, 'reset'):
            iterator.reset()
            it = iterator
        else:
            it = copy.copy(iterator)

        self.global_grad = chainer.cuda.cupy.zeros(
            (self.n_class, self.n_class), dtype=chainer.cuda.cupy.float32)

        margin_list = []
        size = 0
        total = len(it.dataset)
        for batch in it:
            size += len(batch)
            sys.stdout.write('\r{0}/{1}'.format(size, total))
            sys.stdout.flush()
            x, t = self.converter(batch, device)
            xp = chainer.cuda.get_array_module(x)
            c = xp.ones((1, ), dtype=np.float32)
            local_grad = xp.zeros((self.n_class, self.n_class),
                                  dtype=xp.float32)
            with chainer.force_backprop_mode():
                for _ in range(100):
                    noise = xp.random.normal(size=x.shape).astype(xp.float32)
                    normalize(noise)
                    x2 = chainer.Parameter(x + noise)
                    y, t, _ = eval_func((x2, t, c))
                    for i in range(self.n_class):
                        for j in range(i + 1, self.n_class):
                            if i == j:
                                continue
                            target.cleargrads()
                            x2.grad = None
                            F.sum(y[:, i] - y[:, j]).backward()
                            norm = xp.max(
                                xp.sqrt((x2.grad**2).sum(
                                    axis=tuple(range(1, x2.ndim)))))
                            local_grad[i, j] = max(local_grad[i, j], norm)
            for i in range(self.n_class):
                for j in range(i + 1, self.n_class):
                    local_grad[j, i] = local_grad[i, j]
                    self.global_grad[:] = xp.maximum(self.global_grad,
                                                     local_grad)
            with chainer.no_backprop_mode():
                y, t, _ = eval_func((x, t, c))
                y = y.array
            grad = local_grad[t]
            margins = self.get_margin(
                y, y[list(range(t.size)), t].reshape(t.size, 1), grad)
            margins = xp.min(margins, axis=1)
            margin_list.extend(list(margins.get()))

        return margin_list
Ejemplo n.º 14
0
 def all_log_prob(self):
     with chainer.force_backprop_mode():
         return F.log(self.all_prob)
Ejemplo n.º 15
0
 def all_prob(self):
     with chainer.force_backprop_mode():
         return mellowmax.maximum_entropy_mellowmax(self.values)
Ejemplo n.º 16
0
 def max(self):
     with chainer.force_backprop_mode():
         return F.select_item(self.q_values, self.greedy_actions)
Ejemplo n.º 17
0
    def __forward(self, train, support_sets, support_lbls, x_set, x_lbl=None):
        model = self
        mod = self.__mod
        gpu = self.__gpu
        n_out = self.__n_out
        batch_size = support_sets[0].shape[0]
        N = len(support_sets)

        self.cleargrads()
        keys, Ws = self.embed_key(train, support_sets, support_lbls, x_set)
        key_mems, x_keys = keys

        grad_mems = []
        grad_mems1 = []

        for i in range(N):
            self.cleargrads()

            x = support_sets[i]
            x = Variable(x)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    x = F.reshape(x, (1, 1, 28, 28))

                    h = model.block1_1(x, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.block1_2(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.block1_3(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.block1_4(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.block1_5(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.reshape(h, (1, 64))
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = F.relu(model.fc1(h))
                    h = F.dropout(h, ratio=0.0, train=train)
                    y = model.fc2(h)

            y_batch = mod.array(support_lbls[i], dtype=np.int32)
            lbl = Variable(y_batch)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    support_loss = F.softmax_cross_entropy(y, lbl)

            support_loss.backward(retain_grad=True)

            grads = []
            grad_sections = []
            grads.append(model.block1_1.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grads[-1].shape[0])

            grads.append(model.block1_2.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grad_sections[-1] + grads[-1].shape[0])

            grads.append(model.block1_3.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grad_sections[-1] + grads[-1].shape[0])

            grads.append(model.block1_4.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grad_sections[-1] + grads[-1].shape[0])

            grads.append(model.block1_5.conv.W.grad.reshape(-1, 1))

            grads1 = []
            grad_sections1 = []
            grads1.append(model.fc1.W.grad.reshape(-1, 1))
            grad_sections1.append(grads1[-1].shape[0])

            grads1.append(model.fc2.W.grad.reshape(-1, 1))

            meta_in = mod.concatenate(grads, axis=0)
            meta_in = cuda.to_cpu(meta_in)
            meta_in = logAndSign(meta_in, k=7)
            meta_in = mod.array(meta_in)

            meta_in = Variable(meta_in)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():

                    meta_outs = F.relu(
                        model.m_l1(F.dropout(meta_in, ratio=0.0, train=train)))
                    meta_outs = F.relu(
                        model.m_ll1(
                            F.dropout(meta_outs, ratio=0.0, train=train)))
                    meta_outs = model.meta_lstm_l2(
                        F.dropout(meta_outs, ratio=0.0, train=train))

                    grad_mems.append(meta_outs)

            meta_in = mod.concatenate(grads1, axis=0)
            meta_in = cuda.to_cpu(meta_in)
            meta_in = logAndSign(meta_in, k=7)
            meta_in = mod.array(meta_in)

            meta_in = Variable(meta_in)

            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    meta_outs = F.relu(
                        model.mc_l1(F.dropout(meta_in, ratio=0.0,
                                              train=train)))
                    meta_outs = F.relu(
                        model.mc_ll1(
                            F.dropout(meta_outs, ratio=0.0, train=train)))
                    meta_outs = model.meta_g_lstm_l2(
                        F.dropout(meta_outs, ratio=0.0, train=train))
                    grad_mems1.append(meta_outs)

        grad_mems = F.concat(grad_mems, axis=1)
        grad_mems1 = F.concat(grad_mems1, axis=1)

        x_keys = F.split_axis(x_keys, x_set.shape[0], axis=0)

        x = Variable(x_set)
        with chainer.no_backprop_mode():
            with chainer.force_backprop_mode():
                xs = F.split_axis(x, x_set.shape[0], axis=0)

        x_loss = 0
        preds = []
        for x, x_key, lbl in zip(xs, x_keys, x_lbl):
            x_key = F.reshape(x_key, (1, -1))
            sc = F.softmax(cosine_similarity2d(key_mems, x_key))
            meta_outs = F.matmul(grad_mems, sc, transb=True)
            meta_outs1 = F.matmul(grad_mems1, sc, transb=True)

            meta_outs = F.split_axis(meta_outs, grad_sections, axis=0)
            meta_outs1 = F.split_axis(meta_outs1, grad_sections1, axis=0)

            block1_1_W = F.reshape(meta_outs[0],
                                   model.block1_1.conv.W.data.shape)
            block1_2_W = F.reshape(meta_outs[1],
                                   model.block1_2.conv.W.data.shape)
            block1_3_W = F.reshape(meta_outs[2],
                                   model.block1_3.conv.W.data.shape)
            block1_4_W = F.reshape(meta_outs[3],
                                   model.block1_4.conv.W.data.shape)
            block1_5_W = F.reshape(meta_outs[4],
                                   model.block1_5.conv.W.data.shape)
            fc1_W = F.reshape(meta_outs1[0], model.fc1.W.data.shape)
            fc2_W = F.reshape(meta_outs1[1], model.fc2.W.data.shape)

            x = F.reshape(x, (1, 1, 28, 28))
            x = F.dropout(x, ratio=0.0, train=train)
            h = model.block1_1(x, train) + model.block1_1.call_on_W(
                x, block1_1_W, train)
            h = F.max_pooling_2d(h, ksize=2, stride=2)
            h = F.dropout(h, ratio=0.0, train=train)
            h = model.block1_2(h, train) + model.block1_2.call_on_W(
                h, block1_2_W, train)
            h = F.max_pooling_2d(h, ksize=2, stride=2)
            h = F.dropout(h, ratio=0.0, train=train)
            h = model.block1_3(h, train) + model.block1_3.call_on_W(
                h, block1_3_W, train)
            h = F.max_pooling_2d(h, ksize=2, stride=2)
            h = F.dropout(h, ratio=0.0, train=train)
            h = model.block1_4(h, train) + model.block1_4.call_on_W(
                h, block1_4_W, train)
            h = F.max_pooling_2d(h, ksize=2, stride=2)
            h = F.dropout(h, ratio=0.0, train=train)
            h = model.block1_5(h, train) + model.block1_5.call_on_W(
                h, block1_5_W, train)
            h = F.max_pooling_2d(h, ksize=2, stride=2)
            h = F.reshape(h, (1, 64))
            h = F.dropout(h, ratio=0.0, train=train)
            h = F.relu(model.fc1(h)) + F.relu(F.matmul(h, fc1_W, transb=True))
            h = F.dropout(h, ratio=0.0, train=train)
            y = model.fc2(h) + F.matmul(h, fc2_W, transb=True)

            y_batch = mod.array(lbl, dtype=np.int32).reshape((1, ))
            lbl = Variable(y_batch)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    x_loss += F.softmax_cross_entropy(y, lbl)
                    preds += mod.argmax(y.data, 1).tolist()

        return preds, x_loss
Ejemplo n.º 18
0
 def all_log_prob(self):
     with chainer.force_backprop_mode():
         return F.log_softmax(self.beta * self.logits)
        def wrapper(self, structure, Rc, *params):
            differentiate_more = self._order > 0
            with chainer.using_config('enable_backprop', differentiate_more):
                G = func(self, structure, Rc, *params)
                yield F.stack([F.stack(g) for g in G])

            n_atom = len(G[0])
            diff_positions = []
            diff_indices = []
            for i_pos, i_idx, j_pos, j_idx in structure.get_neighbor_info(
                    Rc,
                    ['i_positions', 'i_indices', 'j_positions', 'j_indices']
                ):
                diff_positions.extend([i_pos, j_pos])
                diff_indices.extend([i_idx, j_idx])

            differentiate_more = self._order > 1
            with chainer.using_config('enable_backprop', differentiate_more):
                dG = []
                for g in G:
                    with chainer.force_backprop_mode():
                        grad = chainer.grad(
                            g, diff_positions,
                            enable_double_backprop=differentiate_more)
                    dg = [
                        # by center atom itself
                        F.concat([
                            F.sum(dg_, axis=0)
                            for dg_ in F.split_axis(
                                grad[2*i], diff_indices[2*i][1:], axis=0
                                )
                            ], axis=0)
                        # by neighbor atoms
                        + F.concat([
                            F.sum(dg_, axis=0)
                            for dg_ in F.split_axis(
                                grad[2*i+1], diff_indices[2*i+1][1:], axis=0
                                )
                            ], axis=0)
                        for i in range(n_atom)
                    ]
                    dG.append(dg)
                yield F.stack([F.stack(dg) for dg in dG])

            differentiate_more = self._order > 2
            with chainer.using_config('enable_backprop', differentiate_more):
                d2G = []
                for dg in dG:
                    d2g = []
                    for j in range(3 * n_atom):
                        with chainer.force_backprop_mode():
                            grad = chainer.grad(
                                [dg_[j] for dg_ in dg], diff_positions,
                                enable_double_backprop=differentiate_more)
                        d2g_ = [
                            # by center atom itself
                            F.concat([
                                F.sum(d2g_, axis=0)
                                for d2g_ in F.split_axis(
                                    grad[2*i], diff_indices[2*i][1:], axis=0
                                    )
                                ], axis=0)
                            # by neighbor atoms
                            + F.concat([
                                F.sum(d2g_, axis=0)
                                for d2g_ in F.split_axis(
                                    grad[2*i+1], diff_indices[2*i+1][1:], axis=0
                                    )
                                ], axis=0)
                            for i in range(n_atom)
                        ]
                        d2g.append(d2g_)
                    d2G.append(d2g)
                yield F.stack([F.stack([F.stack(d2g_) for d2g_ in d2g])
                               for d2g in d2G]).transpose(0, 2, 1, 3)
Ejemplo n.º 20
0
    def embed_key(self, train, support_sets, support_lbls, x_set):
        mod = self.__mod
        model = self
        IT = 5

        model.meta_lstm_l1.reset_state()
        model.meta_g_lstm_l1.reset_state()

        N = len(support_sets)
        for i in xrange(0, N, N / IT):
            self.cleargrads()

            x = mod.concatenate(support_sets[i:(i + N / IT)], axis=0)
            x = Variable(x)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    x = F.reshape(x, (-1, 1, 28, 28))
                    x = F.dropout(x, ratio=0.0, train=train)
                    h = model.key_1(x, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_2(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_3(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_4(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_5(h, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.reshape(h, (-1, 64))
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = F.relu(model.key_fc1(h))
                    y = model.key_fc2(h)

            y_batch = mod.array(support_lbls[i:(i + N / IT)],
                                dtype=np.int32).reshape((-1, ))
            lbl = Variable(y_batch)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    loss = F.softmax_cross_entropy(y, lbl)
            loss.backward(retain_grad=True)

            grads = []
            grad_sections = []
            grads.append(model.key_1.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grads[-1].shape[0])

            grads.append(model.key_2.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grad_sections[-1] + grads[-1].shape[0])

            grads.append(model.key_3.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grad_sections[-1] + grads[-1].shape[0])

            grads.append(model.key_4.conv.W.grad.reshape(-1, 1))
            grad_sections.append(grad_sections[-1] + grads[-1].shape[0])

            grads.append(model.key_5.conv.W.grad.reshape(-1, 1))

            grads1 = []
            grad_sections1 = []
            grads1.append(model.key_fc1.W.grad.reshape(-1, 1))
            grad_sections1.append(grads1[-1].shape[0])

            grads1.append(model.key_fc2.W.grad.reshape(-1, 1))

            meta_in = mod.concatenate(grads, axis=0)
            meta_in = cuda.to_cpu(meta_in)
            meta_in = logAndSign(meta_in, k=7)
            meta_in = mod.array(meta_in)
            meta_in = Variable(meta_in)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    meta_outs = model.meta_lstm_l1(
                        F.dropout(meta_in, ratio=0.0, train=train))
                    meta_outs = model.meta_lstm_ll1(
                        F.dropout(meta_outs, ratio=0.0, train=train))

            meta_in = mod.concatenate(grads1, axis=0)
            meta_in = cuda.to_cpu(meta_in)
            meta_in = logAndSign(meta_in, k=7)
            meta_in = mod.array(meta_in)
            meta_in = Variable(meta_in)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    meta_outs1 = model.meta_g_lstm_l1(
                        F.dropout(meta_in, ratio=0.0, train=train))
                    meta_outs1 = model.meta_g_lstm_ll1(
                        F.dropout(meta_outs1, ratio=0.0, train=train))

        meta_outs = F.split_axis(meta_outs, grad_sections, axis=0)
        meta_outs1 = F.split_axis(meta_outs1, grad_sections1, axis=0)

        key_1_W = F.reshape(meta_outs[0], model.key_1.conv.W.data.shape)
        key_2_W = F.reshape(meta_outs[1], model.key_2.conv.W.data.shape)
        key_3_W = F.reshape(meta_outs[2], model.key_3.conv.W.data.shape)
        key_4_W = F.reshape(meta_outs[3], model.key_4.conv.W.data.shape)
        key_5_W = F.reshape(meta_outs[4], model.key_5.conv.W.data.shape)
        key_fc1_W = F.reshape(meta_outs1[0], model.key_fc1.W.data.shape)
        key_fc2_W = F.reshape(meta_outs1[1], model.key_fc2.W.data.shape)

        self.cleargrads()

        keys = []
        for x in [support_sets, x_set]:
            x = mod.asarray(x, dtype=np.float32).reshape((-1, 1, 28, 28))
            x = Variable(x)
            with chainer.no_backprop_mode():
                with chainer.force_backprop_mode():
                    x = F.dropout(x, ratio=0.0, train=train)
                    h = model.key_1(x, train) + model.key_1.call_on_W(
                        x, key_1_W, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_2(h, train) + model.key_2.call_on_W(
                        h, key_2_W, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_3(h, train) + model.key_3.call_on_W(
                        h, key_3_W, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_4(h, train) + model.key_4.call_on_W(
                        h, key_4_W, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_5(h, train) + model.key_5.call_on_W(
                        h, key_5_W, train)
                    h = F.max_pooling_2d(h, ksize=2, stride=2)
                    h = F.reshape(h, (-1, 64))
                    h = F.dropout(h, ratio=0.0, train=train)
                    h = model.key_fc1(h) + F.matmul(h, key_fc1_W, transb=True)
                    keys.append(h)

        Ws = [
            key_1_W, key_2_W, key_3_W, key_4_W, key_5_W, key_fc1_W, key_fc2_W
        ]
        return keys, Ws
Ejemplo n.º 21
0
 def q_values(self):
     with chainer.force_backprop_mode():
         return F.mean(self.quantiles, axis=1)
Ejemplo n.º 22
0
 def greedy_actions(self):
     with chainer.force_backprop_mode():
         return self.maximizer()
Ejemplo n.º 23
0
 def max(self):
     with chainer.force_backprop_mode():
         return self.evaluator(self.greedy_actions)
Ejemplo n.º 24
0
 def entropy(self):
     with chainer.force_backprop_mode():
         return -F.sum(self.all_prob * self.all_log_prob, axis=1)
Ejemplo n.º 25
0
 def all_log_prob(self):
     with chainer.force_backprop_mode():
         if self.min_prob > 0:
             return F.log(self.all_prob)
         else:
             return F.log_softmax(self.beta * self.logits)
Ejemplo n.º 26
0
Archivo: cw.py Proyecto: ytsmiling/lmt
    def craft(self, image, label):
        """
        image is assumed to be in range [0, 1]
        :param image:
        :param label:
        :return:
        """

        adversarial_image_org = (image - 0.5) * 2. * 0.999999
        xp = chainer.cuda.get_array_module(adversarial_image_org)
        cost = xp.ones(image.shape[0], dtype=xp.float32) * self._initial_c
        upper_bound = xp.ones(label.shape) * self._max_c
        lower_bound = xp.ones(label.shape) * self._min_c
        o_best_l2 = xp.ones(label.shape) * 1e10
        o_best_logit = xp.ones(label.shape) * -1
        o_best_attack = image.copy()
        with chainer.force_backprop_mode():
            for i in range(self._max_binary_step):
                msg_base = '\riter: ' + str(i) + ' '
                best_logit = xp.ones(label.shape) * -1
                best_l2 = xp.ones(label.shape) * 1e10
                for r in range(self.n_restart):
                    if r > 0:
                        start_img = adversarial_image_org + xp.random.normal(
                            scale=1e-2 / float(
                                np.sqrt(np.prod(image.shape[1:]))),
                            size=image.shape).astype(xp.float32)
                    else:
                        start_img = adversarial_image_org
                    adversarial_image = chainer.Parameter(xp.arctanh(start_img))
                    opt = chainer.optimizers.Adam(alpha=self._lr)
                    adversarial_image.update_rule = opt.create_update_rule()
                    for j in range(self._max_iter):
                        # get scores
                        feed_image = (chainer.functions.tanh(
                            adversarial_image) * 0.5 + 0.5)
                        l2_dist, loss_data, logit, loss = self.loss(feed_image,
                                                                    label,
                                                                    image, cost,
                                                                    label)
                        cmp = self.compare(logit, label)
                        cmpl2 = best_l2 > l2_dist
                        change = cmpl2 & ~cmp
                        if change.any():
                            best_l2[change] = l2_dist[change]
                            best_logit[change] = 1
                        o_cmpl2 = o_best_l2 > l2_dist
                        change = o_cmpl2 & ~cmp
                        if change.any():
                            o_best_l2[change] = l2_dist[change]
                            o_best_attack[change] = (xp.tanh(
                                adversarial_image.data[change]) * 0.5 + 0.5)
                            o_best_logit[change] = 1
                        self.cleargrads()
                        adversarial_image.grad = None
                        loss.backward()
                        adversarial_image.update()
                # binary search for cost
                success = o_best_logit == 1
                if not self.noprint:
                    sys.stdout.write(msg_base + 'success: ' + str(success.sum()))
                    sys.stdout.flush()
                success = best_logit == 1
                upper_bound[success] = xp.minimum(upper_bound[success],
                                                  cost[success])
                lower_bound[~success] = xp.maximum(lower_bound[~success],
                                                   cost[~success])
                do_bin_search = upper_bound < (self._max_c - 1)
                cost[do_bin_search] = (upper_bound + lower_bound)[
                                          do_bin_search] * .5
                cost[~do_bin_search & ~success] *= 10
            if not self.noprint:
                sys.stdout.write('\n')
            self.l2_history.extend(list(np.sqrt(o_best_l2.get())))
            return o_best_attack