def one_step_attack(adv): if not self.USE_FP16: logits = model_func(adv) else: adv16 = tf.cast(adv, tf.float16) with custom_getter_scope(fp16_getter): logits = model_func(adv16) logits = tf.cast(logits, tf.float32) # Note we don't add any summaries here when creating losses, because # summaries don't work in conditionals. losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=target_label ) # we want to minimize it in targeted attack if not self.USE_FP16: g, = tf.gradients(losses, adv) else: """ We perform loss scaling to prevent underflow: https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html (We have not yet tried training without scaling) """ g, = tf.gradients(losses * 128., adv) g = g / 128. """ Feature Denoising, Sec 5: We use the Projected Gradient Descent (PGD) (implemented at https://github.com/MadryLab/cifar10_challenge ) as the white-box attacker for adversarial training """ adv = tf.clip_by_value(adv + tf.sign(g) * self.step_size, lower_bound, upper_bound) return adv
def backbone_scope(freeze): """ Args: freeze (bool): whether to freeze all the variables under the scope """ def nonlin(x): x = get_norm()(x) return tf.nn.relu(x) with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, activation=nonlin, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')), \ ExitStack() as stack: if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']: if freeze or cfg.BACKBONE.NORM == 'FreezeBN': stack.enter_context(argscope(BatchNorm, training=False)) else: stack.enter_context( argscope(BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod')) if freeze: stack.enter_context( freeze_variables(stop_gradient=False, skip_collection=True)) else: # the layers are not completely freezed, but we may want to only freeze the affine if cfg.BACKBONE.FREEZE_AFFINE: stack.enter_context(custom_getter_scope(freeze_affine_getter)) yield
def backbone_scope(freeze): """ Args: freeze (bool): whether to freeze all the variables under the scope """ def nonlin(x): x = get_norm()(x) return tf.nn.relu(x) with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, activation=nonlin, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')), \ ExitStack() as stack: if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']: if freeze or cfg.BACKBONE.NORM == 'FreezeBN': stack.enter_context(argscope(BatchNorm, training=False)) else: stack.enter_context(argscope( BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod')) if freeze: stack.enter_context(freeze_variables(stop_gradient=False, skip_collection=True)) else: # the layers are not completely freezed, but we may want to only freeze the affine if cfg.BACKBONE.FREEZE_AFFINE: stack.enter_context(custom_getter_scope(freeze_affine_getter)) yield
def backbone_argscope(): def nonlin(x): x = get_norm()(x) return tf.nn.relu(x) with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, activation=nonlin), \ argscope(BatchNorm, training=False), \ custom_getter_scope(maybe_freeze_affine): yield
def rename_get_variable(mapping): def custom_getter(getter, name, *args, **kwargs): splits = name.split('/') basename = splits[-1] if basename in mapping: basename = mapping[basename] splits[-1] = basename name = '/'.join(splits) return getter(name, *args, **kwargs) return custom_getter_scope(custom_getter)
def rename_get_variable(mapping): """ Args: mapping(dict): an old -> new mapping for variable basename. e.g. {'kernel': 'W'} """ def custom_getter(getter, name, *args, **kwargs): splits = name.split('/') basename = splits[-1] if basename in mapping: basename = mapping[basename] splits[-1] = basename name = '/'.join(splits) return getter(name, *args, **kwargs) return custom_getter_scope(custom_getter)
def _get_logits(self, image): ctx = get_current_tower_context() with maybe_freeze_updates(ctx.index > 0): network = ConvNetBuilder( image, 3, True, use_tf_layers=True, data_format=self.data_format, dtype=tf.float16 if args.use_fp16 else tf.float32, variable_dtype=tf.float32) with custom_getter_scope(network.get_custom_getter()): dataset = lambda: 1 dataset.name = 'imagenet' model_conf = model_config.get_model_config('resnet50', dataset) model_conf.set_batch_size(args.batch) model_conf.add_inference(network) return network.affine(1000, activation='linear', stddev=0.001)
def one_step_attack(adv): if not self.USE_FP16: logits = model_func(adv) else: adv16 = tf.cast(adv, tf.float16) with custom_getter_scope(fp16_getter): logits = model_func(adv16) logits = tf.cast(logits, tf.float32) # Note we don't add any summaries here when creating losses, because # summaries don't work in conditionals. if self.targetted: target_label = self._create_random_target(label) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=target_label) # we want to minimize it in targeted attack else: target_label = tf.argmax(logits, axis=1) losses = -tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=target_label) if not self.USE_FP16: g, = tf.gradients(losses, adv) else: """ We perform loss scaling to prevent underflow: https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html (We have not yet tried training without scaling) """ g, = tf.gradients(losses * 128., adv) g = g / 128. """ Feature Denoising, Sec 5: We use the Projected Gradient Descent (PGD) (implemented at https://github.com/MadryLab/cifar10_challenge ) as the white-box attacker for adversarial training """ if not self.l2: #linf adv = tf.clip_by_value(adv - tf.sign(g) * self.step_size, lower_bound, upper_bound) else: #l2 mask = tf.norm(tf.reshape(g, (-1, g.shape[1]*g.shape[2]*g.shape[3])), axis=1) < self.epsilon idxs = tf.where(mask) mask = tf.broadcast_to(tf.dtypes.cast(mask, tf.float32), (1,g.shape[1], g.shape[2], g.shape[3])) normalized = self.epsilon*tf.math.l2_normalize(g, axis=[1,2,3]) g = (1.0 - mask)*g + mask*normalized adv = adv - g return adv
def backbone_scope(freeze): """ Args: freeze (bool): whether to freeze all the variables under the scope 创建如下上下文 Conv,MaxPool,BatchNorm的输入格式都是(NCHW) Conv没有bias,使用Norm->Relu作为activation 1)如果BACKBONE.NORM=FreezeBN,BatchNorm.traing=False 2)如果BACKBONE.NORM=SyncBN,BatchNorm.sync_statistics=nccl|horovod(收集所有GPU的batch,求mean,var) a)如果freeze=True,所有变量不会被训练,变量加入到MODEL_VARIABLES而不是TRAINABLE_VARIABLES """ def nonlin(x): x = get_norm()(x) return tf.nn.relu(x) with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, activation=nonlin, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')), \ ExitStack() as stack: if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']: if freeze or cfg.BACKBONE.NORM == 'FreezeBN': stack.enter_context(argscope(BatchNorm, training=False)) else: stack.enter_context( argscope(BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod')) if freeze: stack.enter_context( freeze_variables(stop_gradient=False, skip_collection=True)) else: # the layers are not completely freezed, but we may want to only freeze the affine if cfg.BACKBONE.FREEZE_AFFINE: stack.enter_context(custom_getter_scope(freeze_affine_getter)) yield
def resnet_argscope(): with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False), \ argscope(BatchNorm, training=False), \ custom_getter_scope(maybe_freeze_affine): yield
def resnet_argscope(): with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False), \ argscope(BatchNorm, use_local_stat=False), \ custom_getter_scope(maybe_freeze_affine): yield
def build_graph(image, label): if USE_FP16: image = tf.cast(image, tf.float16) def activation(x): return tf.nn.leaky_relu(x, alpha=0.1) def residual(name, x, chan): with tf.variable_scope(name): x = Conv2D('res1', x, chan, 3) x = BatchNorm('bn1', x) x = activation(x) x = Conv2D('res2', x, chan, 3) x = BatchNorm('bn2', x) x = activation(x) return x def fp16_getter(getter, *args, **kwargs): name = args[0] if len(args) else kwargs['name'] if not USE_FP16 or (not name.endswith('/W') and not name.endswith('/b')): # ignore BN's gamma and beta return getter(*args, **kwargs) else: if kwargs['dtype'] == tf.float16: kwargs['dtype'] = tf.float32 ret = getter(*args, **kwargs) return tf.cast(ret, tf.float16) else: return getter(*args, **kwargs) with custom_getter_scope(fp16_getter), \ argscope(Conv2D, activation=tf.identity, use_bias=False), \ argscope([Conv2D, MaxPooling, BatchNorm], data_format=DATA_FORMAT), \ argscope(BatchNorm, momentum=0.8): with tf.variable_scope('prep'): l = Conv2D('conv', image, 64, 3) l = BatchNorm('bn', l) l = activation(l) with tf.variable_scope("layer1"): l = Conv2D('conv', l, 128, 3) l = MaxPooling('pool', l, 2) l = BatchNorm('bn', l) l = activation(l) l = l + residual('res', l, 128) with tf.variable_scope("layer2"): l = Conv2D('conv', l, 256, 3) l = MaxPooling('pool', l, 2) l = BatchNorm('bn', l) l = activation(l) with tf.variable_scope("layer3"): l = Conv2D('conv', l, 512, 3) l = MaxPooling('pool', l, 2) l = BatchNorm('bn', l) l = activation(l) l = l + residual('res', l, 512) l = tf.reduce_max(l, axis=[2, 3] if DATA_FORMAT == "NCHW" else [1, 2]) l = FullyConnected('fc', l, 10, use_bias=False) logits = tf.cast(l * 0.125, tf.float32, name='logits') cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits) cost = tf.reduce_sum(cost) wd_cost = regularize_cost('.*', l2_regularizer(5e-4 * BATCH), name='regularize_loss') correct = tf.equal(tf.argmax(logits, axis=1), tf.argmax(label, axis=1), name='correct') return tf.add_n([cost, wd_cost], name='cost')
def resnet_argscope(): with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False), \ argscope(BatchNorm, use_local_stat=False), \ custom_getter_scope(maybe_freeze_affine): yield