예제 #1
0
    def __init__(self, model, batch_size, width_min=None, prob=0.9):
        assert isinstance(model, ClassifierDifferentiable)
        assert len(model.x_shape) == 3
        Attack.__init__(self, model, batch_size)

        self.width_max = self.model.x_shape[1]
        self.width_min = width_min if width_min else self.width_max // 2
        self.prob = prob

        self.x_dim = 1
        for x in self.model.x_shape:
            self.x_dim *= x

        self.batch_size = batch_size
        self.xs_ph = tf.placeholder(model.x_dtype, (batch_size, self.x_dim))
        self.ys_ph = tf.placeholder(model.y_dtype, (batch_size, ))

        self.xs_var = tf.Variable(tf.zeros_like(self.xs_ph))
        self.xs_adv = tf.Variable(tf.zeros_like(self.xs_ph))
        self.ys_var = tf.Variable(tf.zeros_like(self.ys_ph))
        self.g_var = tf.Variable(tf.zeros_like(self.xs_var))

        xs_adv_di = self._input_diversity(
            tf.reshape(self.xs_adv, (self.batch_size, *self.model.x_shape)))
        logits, _ = self.model.logits_and_labels(xs_adv_di)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.ys_var, logits=logits)
        self.grad = tf.gradients(loss, self.xs_adv)[0]
        self.grad_l1 = tf.reduce_sum(tf.abs(self.grad), axis=1, keepdims=True)
        self.setup = [
            self.xs_var.assign(self.xs_ph),
            self.xs_adv.assign(self.xs_ph),
            self.ys_var.assign(self.ys_ph),
            tf.variables_initializer([self.g_var])
        ]
    def __init__(self, model, batch_size):
        assert isinstance(model, Classifier)
        if batch_size != 1:
            raise NotImplementedError

        Attack.__init__(self, model=model, batch_size=batch_size)

        self.xs_ph = tf.placeholder(model.x_dtype, (None,) + self.model.x_shape)
        self.labels_pred = self.model.labels(self.xs_ph)
예제 #3
0
    def __init__(self, model, batch_size):
        assert isinstance(model, ClassifierWithLogits)
        if batch_size != 1:
            raise NotImplementedError

        Attack.__init__(self, model, batch_size)

        self.xs_ph = tf.placeholder(model.x_dtype,
                                    (None, ) + self.model.x_shape)
        self.ys_ph = tf.placeholder(model.y_dtype, (None, ))

        logits, self.labels_pred = self.model.logits_and_labels(self.xs_ph)

        # NES uses margin logit loss proposed in C&W.
        logit_mask = tf.one_hot(self.ys_ph, self.model.n_class)
        label_logits = tf.reduce_sum(logit_mask * logits, axis=-1)
        highest_nonlabel_logits = tf.reduce_max(logits - logit_mask * 99999,
                                                axis=-1)
        self.loss = highest_nonlabel_logits - label_logits
    def __init__(self, model, batch_size, overshot=0.02):
        Attack.__init__(self, model=model, batch_size=batch_size)
        assert isinstance(self.model, ClassifierDifferentiable)

        self.batch_size = batch_size
        self.overshot = overshot

        x_dim = 1
        for x in self.model.x_shape:
            x_dim *= x

        self.x_dim = x_dim

        self.xs_ph = tf.placeholder(self.model.x_dtype,
                                    shape=(batch_size, x_dim))
        self.ys_ph = tf.placeholder(self.model.y_dtype, shape=(batch_size, ))
        self.xs_var = tf.Variable(tf.zeros_like(self.xs_ph))
        self.ys_var = tf.Variable(tf.zeros_like(self.ys_ph))
        self.xs_adv = tf.Variable(tf.zeros_like(self.xs_ph))

        logits, self.labels = self.model.logits_and_labels(
            xs_ph=tf.reshape(self.xs_adv, (batch_size, ) + self.model.x_shape))

        k0s = tf.stack((tf.range(self.batch_size), self.ys_var), axis=1)

        grads = [
            tf.gradients(logits[:, i], self.xs_adv)[0]
            for i in range(self.model.n_class)
        ]
        grads = tf.stack(grads, axis=0)
        grads = tf.transpose(grads, (1, 0, 2))

        yk0s = tf.expand_dims(tf.gather_nd(logits, k0s), axis=1)
        gradk0s = tf.expand_dims(tf.gather_nd(grads, k0s), axis=1)

        self.fs = tf.abs(yk0s - logits)
        self.ws = grads - gradk0s

        self.iteration = None
        self.distance_metric = None
    def __init__(self, model, batch_size, goal, distance_metric, decay_factor):
        assert isinstance(model, ClassifierDifferentiable)
        Attack.__init__(self, model=model, batch_size=batch_size)

        xs_shape = (self.batch_size, np.prod(self.model.x_shape))
        ys_shape = (self.batch_size, )
        model_xs_shape = (self.batch_size, *self.model.x_shape)
        xs_zeros = tf.zeros(xs_shape, dtype=self.model.x_dtype)

        self.xs_ph = get_xs_ph(self.model, self.batch_size)
        self.ys_ph = get_ys_ph(self.model, self.batch_size)
        self.eps_ph = tf.Variable(tf.zeros((self.batch_size, )))
        self.alpha_ph = tf.Variable(tf.zeros((self.batch_size, )))

        self.xs_var = tf.Variable(xs_zeros)
        self.ys_var = tf.Variable(tf.zeros(ys_shape, dtype=self.model.y_dtype))
        self.g_var = tf.Variable(xs_zeros)
        self.eps_var = tf.Variable(tf.zeros((batch_size, )))
        self.alpha_var = tf.Variable(tf.zeros((batch_size, )))

        self.xs_adv_var = tf.Variable(xs_zeros)
        self.xs_adv = tf.reshape(self.xs_adv_var, model_xs_shape)

        logits, _ = self.model.logits_and_labels(self.xs_adv)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.ys_var, logits=logits)

        self.grad = tf.gradients(loss, self.xs_adv_var)[0]
        self.grad_l1 = tf.reduce_sum(tf.abs(self.grad), axis=1)
        self.config_setup = [
            self.eps_var.assign(self.eps_ph),
            self.alpha_var.assign(self.alpha_ph),
        ]
        xs_ph_in = tf.reshape(self.xs_ph, xs_shape)
        self.setup = [
            self.xs_var.assign(xs_ph_in),
            self.ys_var.assign(self.ys_ph),
            self.xs_adv_var.assign(xs_ph_in),
            tf.variables_initializer([self.g_var]),
        ]

        eps = tf.expand_dims(self.eps_var, 1)
        alpha = tf.expand_dims(self.alpha_var, 1)

        g_next = decay_factor * self.g_var + \
            self.grad / tf.expand_dims(self.grad_l1, 1)
        self.update_g_step = self.g_var.assign(g_next)

        if goal == 't' or goal == 'tm':
            g = -self.g_var
        elif goal == 'ut':
            g = self.g_var
        else:
            raise NotImplementedError

        if distance_metric == 'l_2':
            g_norm = tf.maximum(1e-12, tf.norm(g, axis=1))
            g_unit = g / tf.expand_dims(g_norm, 1)
            d = self.xs_adv_var + alpha * g_unit - self.xs_var
            xs_next = self.xs_var + tf.clip_by_norm(d, eps, axes=[1])
        elif distance_metric == 'l_inf':
            lo, hi = self.xs_var - eps, self.xs_var + eps
            d = self.xs_adv_var + alpha * tf.sign(g) - self.xs_var
            xs_next = tf.clip_by_value(self.xs_var + d, lo, hi)
        else:
            raise NotImplementedError

        xs_next = tf.clip_by_value(xs_next, self.model.x_min, self.model.x_max)
        self.step = self.xs_adv_var.assign(xs_next)

        self.goal = goal
    def __init__(self, model, batch_size, goal, distance_metric, learning_rate,
                 confidence):
        assert isinstance(model, ClassifierDifferentiable)
        Attack.__init__(self, model, batch_size)

        self.confidence = confidence

        def scale(vec, dst_lo, dst_hi, src_lo, src_hi):
            k = (dst_hi - dst_lo) / (src_hi - src_lo)
            b = dst_lo - k * src_lo
            return k * vec + b

        def scale_to_model(vec):
            return scale(vec, self.model.x_min, self.model.x_max, -1.0, 1.0)

        def scale_to_tanh(vec):
            return scale(vec, 1e-6 - 1, 1 - 1e-6, self.model.x_min,
                         self.model.x_max)

        model_xs_shape = (self.batch_size, *self.model.x_shape)

        xs_shape = (self.batch_size, np.prod(self.model.x_shape))

        xs_zeros = tf.zeros(xs_shape, dtype=self.model.x_dtype)

        self.xs_ph = get_xs_ph(self.model, self.batch_size)
        self.ys_ph = get_ys_ph(self.model, self.batch_size)
        self.cs_ph = tf.placeholder(self.model.x_dtype, (self.batch_size, ))

        xs_var = tf.Variable(xs_zeros)
        ys_var = tf.Variable(tf.zeros_like(self.ys_ph))
        cs_var = tf.Variable(tf.zeros_like(self.cs_ph))

        d_ws = tf.Variable(xs_zeros)
        ws = tf.atanh(scale_to_tanh(xs_var)) + d_ws

        self.xs_adv = scale_to_model(tf.tanh(ws))
        self.xs_adv_output = tf.reshape(self.xs_adv, model_xs_shape)

        logits, _ = self.model.logits_and_labels(self.xs_adv_output)

        ys_one_hot = tf.one_hot(ys_var, self.model.n_class)

        logit_target = tf.reduce_sum(ys_one_hot * logits, 1)
        logit_other = (1 - ys_one_hot) * logits
        logit_other = logit_other - 0.5 * self.model.x_dtype.max * ys_one_hot
        logit_other = tf.reduce_max(logit_other, 1)

        self.setup_xs = xs_var.assign(tf.reshape(self.xs_ph, xs_shape))
        self.setup_ys = ys_var.assign(self.ys_ph)
        self.setup_cs = cs_var.assign(self.cs_ph)
        self.setup_d_ws = d_ws.assign(tf.zeros_like(d_ws))

        if distance_metric == 'l_2':
            dists = tf.reduce_sum(tf.square(self.xs_adv - xs_var), axis=1)
        elif distance_metric == 'l_inf':
            dists = tf.reduce_max(tf.abs(self.xs_adv - xs_var), axis=1)
        else:
            raise NotImplementedError

        if goal == 't' or goal == 'tm':
            score = tf.maximum(0.0, logit_other - logit_target + confidence)
        elif goal == 'ut':
            score = tf.maximum(0.0, logit_target - logit_other + confidence)
        else:
            raise NotImplementedError
        self.goal = goal

        loss = dists + cs_var * score

        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.optimizer_step = optimizer.minimize(loss, var_list=[d_ws])
        self.setup_optimizer = tf.variables_initializer(optimizer.variables())

        self.score = score
        self.logits = logits
        self.dists = dists
예제 #7
0
    def __init__(self,
                 model,
                 batch_size,
                 goal,
                 distance_metric,
                 random_start=False):
        # TODO random_start
        assert isinstance(model, ClassifierDifferentiable)
        Attack.__init__(self, model=model, batch_size=batch_size)

        xs_shape = (self.batch_size, np.prod(self.model.x_shape))
        ys_shape = (self.batch_size, )
        model_xs_shape = (self.batch_size, *self.model.x_shape)
        xs_zeros = tf.zeros(xs_shape, dtype=self.model.x_dtype)

        self.xs_ph = get_xs_ph(self.model, self.batch_size)
        self.ys_ph = get_ys_ph(self.model, self.batch_size)
        self.eps_ph = tf.Variable(tf.zeros((self.batch_size, )))
        self.alpha_ph = tf.Variable(tf.zeros((self.batch_size, )))

        self.xs_var = tf.Variable(xs_zeros)
        self.ys_var = tf.Variable(tf.zeros(ys_shape, dtype=self.model.y_dtype))
        self.eps_var = tf.Variable(tf.zeros((batch_size, )))
        self.alpha_var = tf.Variable(tf.zeros((batch_size, )))

        self.xs_adv_var = tf.Variable(xs_zeros)
        self.xs_adv = tf.reshape(self.xs_adv_var, model_xs_shape)

        logits, _ = self.model.logits_and_labels(self.xs_adv)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.ys_var, logits=logits)

        self.grad = tf.gradients(loss, self.xs_adv_var)[0]
        self.config_setup = [
            self.eps_var.assign(self.eps_ph),
            self.alpha_var.assign(self.alpha_ph),
        ]
        xs_ph_in = tf.reshape(self.xs_ph, xs_shape)
        self.setup = [
            self.xs_var.assign(xs_ph_in),
            self.ys_var.assign(self.ys_ph),
            self.xs_adv_var.assign(xs_ph_in),
        ]

        eps = tf.expand_dims(self.eps_var, 1)
        alpha = tf.expand_dims(self.alpha_var, 1)

        if goal == 't' or goal == 'tm':
            grad = -self.grad
        elif goal == 'ut':
            grad = self.grad
        else:
            raise NotImplementedError

        if distance_metric == 'l_2':
            grad_norm = tf.maximum(1e-12, tf.norm(grad, axis=1))
            grad_unit = grad / tf.expand_dims(grad_norm, 1)
            xs_next = self.xs_adv_var - self.xs_var + alpha * grad_unit
            xs_next = self.xs_var + tf.clip_by_norm(xs_next, eps, axes=[1])
        elif distance_metric == 'l_inf':
            lo, hi = self.xs_var - eps, self.xs_var + eps
            xs_next = self.xs_adv_var + alpha * tf.sign(grad)
            xs_next = tf.clip_by_value(xs_next, lo, hi)
        else:
            raise NotImplementedError

        xs_next = tf.clip_by_value(xs_next, self.model.x_min, self.model.x_max)
        self.step = self.xs_adv_var.assign(xs_next)

        self.goal = goal