Ejemplo n.º 1
0
 def test_xe_smoothing(self):
     loss = CrossEntropy(self.model, smoothing=0.1)
     l = loss.fprop(self.x, self.y)
     with tf.Session() as sess:
         vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
         vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
     self.assertClose(vl1, sum([2.10587597, 1.47194624]) / 2., atol=1e-6)
     self.assertClose(vl2, sum([2.10587597, 1.47194624]) / 2., atol=1e-6)
Ejemplo n.º 2
0
 def test_xe(self):
     loss = CrossEntropy(self.model, smoothing=0.)
     l = loss.fprop(self.x, self.y)
     with tf.Session() as sess:
         vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
         vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
     self.assertClose(vl1, sum([2.210599660, 1.53666997]) / 2., atol=1e-6)
     self.assertClose(vl2, sum([2.210599660, 1.53666997]) / 2., atol=1e-6)
Ejemplo n.º 3
0
def SNNL_example(train_start=0,
                 train_end=60000,
                 test_start=0,
                 test_end=10000,
                 nb_epochs=NB_EPOCHS,
                 batch_size=BATCH_SIZE,
                 learning_rate=LEARNING_RATE,
                 nb_filters=NB_FILTERS,
                 SNNL_factor=SNNL_FACTOR,
                 output_dir=OUTPUT_DIR):
    """
  A simple model trained to minimize Cross Entropy and Maximize Soft Nearest
  Neighbor Loss at each internal layer. This outputs a TSNE of the sign of
  the adversarial gradients of a trained model. A model with a negative
  SNNL_factor will show little or no class clusters, while a model with a
  0 SNNL_factor will have class clusters in the adversarial gradient direction.
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param SNNL_factor: multiplier for Soft Nearest Neighbor Loss
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        print('Test accuracy on legitimate examples: %0.4f' % (acc))

    model = ModelBasicCNN('model', nb_classes, nb_filters)
    preds = model.get_logits(x)
    cross_entropy_loss = CrossEntropy(model)
    if not SNNL_factor:
        loss = cross_entropy_loss
    else:
        loss = SNNLCrossEntropy(model,
                                factor=SNNL_factor,
                                optimize_temperature=False)

    def evaluate():
        do_eval(preds, x_test, y_test, 'clean_train_clean_eval')

    train(sess,
          loss,
          x_train,
          y_train,
          evaluate=evaluate,
          args=train_params,
          rng=rng,
          var_list=model.get_params())

    do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    def imscatter(points, images, ax=None, zoom=1, cmap="hot"):
        if ax is None:
            ax = plt.gca()
        artists = []
        i = 0
        if not isinstance(cmap, list):
            cmap = [cmap] * len(points)
        for x0, y0 in points:
            transformed = (images[i] - np.min(images[i])) / \
                (np.max(images[i]) - np.min(images[i]))
            im = OffsetImage(transformed[:, :, 0], zoom=zoom, cmap=cmap[i])
            ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
            artists.append(ax.add_artist(ab))
            i += 1
        ax.update_datalim(np.column_stack(np.transpose(points)))
        ax.autoscale()
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        return artists

    adv_grads = tf.sign(tf.gradients(cross_entropy_loss.fprop(x, y), x))
    feed_dict = {x: x_test[:batch_size], y: y_test[:batch_size]}
    adv_grads_val = sess.run(adv_grads, feed_dict=feed_dict)
    adv_grads_val = np.reshape(adv_grads_val,
                               (batch_size, img_rows * img_cols))

    X_embedded = TSNE(n_components=2, verbose=0).fit_transform(adv_grads_val)
    plt.figure(num=None,
               figsize=(50, 50),
               dpi=40,
               facecolor='w',
               edgecolor='k')
    plt.title(
        "TSNE of Sign of Adv Gradients, SNNLCrossEntropy Model, factor:" +
        str(FLAGS.SNNL_factor),
        fontsize=42)
    imscatter(X_embedded, x_test[:batch_size], zoom=2, cmap="Purples")
    plt.savefig(output_dir + 'adversarial_gradients_SNNL_factor_' +
                str(SNNL_factor) + '.png')
Ejemplo n.º 4
0
feeder.reset()
pred_feeder.reset()
adv_feeder.reset()

inspector_list = []
inspector_pred_list = []
inspector_adv_list = []

for ii in range(FLAGS.num_threads):
    print('Setting feeders for thread #{}...'.format(ii+1))
    inspector_pred_list.append(
        darkon.Influence(
            workspace=os.path.join(workspace_dir, 'pred'),
            feeder=copy.deepcopy(pred_feeder),
            loss_op_train=full_loss.fprop(x=x, y=y),
            loss_op_test=loss.fprop(x=x, y=y),
            x_placeholder=x,
            y_placeholder=y)
    )
    inspector_adv_list.append(
        darkon.Influence(
            workspace=os.path.join(workspace_dir, 'adv', FLAGS.attack),
            feeder=copy.deepcopy(adv_feeder),
            loss_op_train=full_loss.fprop(x=x, y=y),
            loss_op_test=loss.fprop(x=x, y=y),
            x_placeholder=x,
            y_placeholder=y)
    )

# some optimizations for the darkon influence function implementations
testset_batch_size = 100
Ejemplo n.º 5
0
def train(sess,
          x,
          y,
          model,
          x_train,
          y_train,
          init_all=False,
          evaluate=None,
          feed=None,
          args=None,
          rng=None,
          var_list=None,
          fprop_args=None,
          optimizer=None,
          devices=None,
          x_batch_preprocessor=None,
          use_ema=False,
          ema_decay=.998,
          run_canary=None,
          loss_threshold=1e5,
          dataset_train=None,
          dataset_size=None):
    """
  Run (optionally multi-replica, synchronous) training to minimize `loss`
  :param sess: TF session to use when training the graph
  :param loss: tensor, the loss to minimize
  :param x_train: numpy array with training inputs or tf Dataset
  :param y_train: numpy array with training outputs or tf Dataset
  :param init_all: (boolean) If set to true, all TF variables in the session
                   are (re)initialized, otherwise only previously
                   uninitialized variables are initialized before training.
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param feed: An optional dictionary that is appended to the feeding
               dictionary before the session runs. Can be used to feed
               the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :param devices: list of device names to use for training
      If None, defaults to: all GPUs, if GPUs are available
                            all devices, if no GPUs are available
  :param x_batch_preprocessor: callable
      Takes a single tensor containing an x_train batch as input
      Returns a single tensor containing an x_train batch as output
      Called to preprocess the data before passing the data to the Loss
  :param use_ema: bool
      If true, uses an exponential moving average of the model parameters
  :param ema_decay: float or callable
      The decay parameter for EMA, if EMA is used
      If a callable rather than a float, this is a callable that takes
      the epoch and batch as arguments and returns the ema_decay for
      the current batch.
  :param loss_threshold: float
      Raise an exception if the loss exceeds this value.
      This is intended to rapidly detect numerical problems.
      Sometimes the loss may legitimately be higher than this value. In
      such cases, raise the value. If needed it can be np.inf.
  :param dataset_train: tf Dataset instance.
      Used as a replacement for x_train, y_train for faster performance.
    :param dataset_size: integer, the size of the dataset_train.
  :return: True if model trained
  """

    # Check whether the hardware is working correctly
    canary.run_canary()
    if run_canary is not None:
        warnings.warn("The `run_canary` argument is deprecated. The canary "
                      "is now much cheaper and thus runs all the time. The "
                      "canary now uses its own loss function so it is not "
                      "necessary to turn off the canary when training with "
                      " a stochastic loss. Simply quit passing `run_canary`."
                      "Passing `run_canary` may become an error on or after "
                      "2019-10-16.")

    args = _ArgsWrapper(args or {})
    fprop_args = fprop_args or {}
    saver = tf.train.Saver()

    # Check that necessary arguments were given (see doc above)
    # Be sure to support 0 epochs for debugging purposes
    if args.nb_epochs is None:
        raise ValueError("`args` must specify number of epochs")
    if optimizer is None:
        if args.learning_rate is None:
            raise ValueError("Learning rate was not given in args dict")
    assert args.batch_size, "Batch size was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    if optimizer is None:
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    else:
        if not isinstance(optimizer, tf.train.Optimizer):
            raise ValueError("optimizer object must be from a child class of "
                             "tf.train.Optimizer")

    loss = CrossEntropy(model, smoothing=0.1)
    grads = []
    xs = []
    preprocessed_xs = []
    ys = []
    if dataset_train is not None:
        assert x_train is None and y_train is None and x_batch_preprocessor is None
        if dataset_size is None:
            raise ValueError("You must provide a dataset size")
        data_iterator = dataset_train.make_one_shot_iterator().get_next()
        x_train, y_train = sess.run(data_iterator)

    devices = infer_devices(devices)
    for device in devices:
        with tf.device(device):
            x = tf.placeholder(x_train.dtype, (None, ) + x_train.shape[1:])
            y = tf.placeholder(y_train.dtype, (None, ) + y_train.shape[1:])
            xs.append(x)
            ys.append(y)

            if x_batch_preprocessor is not None:
                x = x_batch_preprocessor(x)

            # We need to keep track of these so that the canary can feed
            # preprocessed values. If the canary had to feed raw values,
            # stochastic preprocessing could make the canary fail.
            preprocessed_xs.append(x)

            loss_value = loss.fprop(x, y, **fprop_args)

            grads.append(
                optimizer.compute_gradients(loss_value, var_list=var_list))
    num_devices = len(devices)
    print("num_devices: ", num_devices)

    grad = avg_grads(grads)
    # Trigger update operations within the default graph (such as batch_norm).
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_step = optimizer.apply_gradients(grad)

    epoch_tf = tf.placeholder(tf.int32, [])
    batch_tf = tf.placeholder(tf.int32, [])

    if use_ema:
        if callable(ema_decay):
            ema_decay = ema_decay(epoch_tf, batch_tf)
        ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
        with tf.control_dependencies([train_step]):
            train_step = ema.apply(var_list)
        # Get pointers to the EMA's running average variables
        avg_params = [ema.average(param) for param in var_list]
        # Make temporary buffers used for swapping the live and running average
        # parameters
        tmp_params = [
            tf.Variable(param, trainable=False) for param in var_list
        ]
        # Define the swapping operation
        param_to_tmp = [
            tf.assign(tmp, param)
            for tmp, param in safe_zip(tmp_params, var_list)
        ]
        with tf.control_dependencies(param_to_tmp):
            avg_to_param = [
                tf.assign(param, avg)
                for param, avg in safe_zip(var_list, avg_params)
            ]
        with tf.control_dependencies(avg_to_param):
            tmp_to_avg = [
                tf.assign(avg, tmp)
                for avg, tmp in safe_zip(avg_params, tmp_params)
            ]
        swap = tmp_to_avg

    batch_size = args.batch_size

    assert batch_size % num_devices == 0
    device_batch_size = batch_size // num_devices

    if init_all:
        sess.run(tf.global_variables_initializer())
    else:
        initialize_uninitialized_global_variables(sess)

    batch_shape = (device_batch_size, 32, 32, 3)
    attack = DDN_tf(model, batch_shape, 100, False)
    #attack = KKTFun5(model, sess=sess)
    #attack_params = {'eps':0.7,
    #                 'alp':1,
    #                 'ord':2,
    #                 'nb_iter':20,
    #                 'clip_min':0.,
    #                 'clip_max':1.}
    x_ = tf.reshape(x, (device_batch_size, 32, 32, 3))
    y_ = tf.argmax(tf.reshape(y, (device_batch_size, 10)), axis=1)
    #y_ = tf.cast(y_, tf.int32)
    #adv_x, log1,log2 = attack.generate(x_, y_, **attack_params)
    for epoch in xrange(200, args.nb_epochs):
        if dataset_train is not None:
            nb_batches = int(math.ceil(float(dataset_size) / batch_size))
        else:
            # Indices to shuffle training set
            index_shuf = list(range(len(x_train)))
            # Randomly repeat a few training examples each epoch to avoid
            # having a too-small batch
            while len(index_shuf) % batch_size != 0:
                index_shuf.append(rng.randint(len(x_train)))
            nb_batches = len(index_shuf) // batch_size
            rng.shuffle(index_shuf)
            # Shuffling here versus inside the loop doesn't seem to affect
            # timing very much, but shuffling here makes the code slightly
            # easier to read
            x_train_shuffled = x_train[index_shuf]
            y_train_shuffled = y_train[index_shuf]

        prev = time.time()
        for batch in range(nb_batches):
            if dataset_train is not None:
                x_train_shuffled, y_train_shuffled = sess.run(data_iterator)
                start, end = 0, batch_size
            else:
                # Compute batch start and end indices
                start = batch * batch_size
                end = (batch + 1) * batch_size
                # Perform one training step
                diff = end - start
                assert diff == batch_size

            feed_dict = {epoch_tf: epoch, batch_tf: batch}
            for dev_idx in xrange(num_devices):
                cur_start = start + dev_idx * device_batch_size
                cur_end = start + (dev_idx + 1) * device_batch_size
                x_train_adv = attack.attack(
                    sess, x_train_shuffled[cur_start:cur_end],
                    y_train_shuffled[cur_start:cur_end])
                feed_dict[xs[dev_idx]] = x_train_adv
                feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]
            if cur_end != end and dataset_train is None:
                msg = ("batch_size (%d) must be a multiple of num_devices "
                       "(%d).\nCUDA_VISIBLE_DEVICES: %s"
                       "\ndevices: %s")
                args = (batch_size, num_devices,
                        os.environ['CUDA_VISIBLE_DEVICES'], str(devices))
                raise ValueError(msg % args)
            if feed is not None:
                feed_dict.update(feed)

            _, loss_numpy = sess.run([train_step, loss_value],
                                     feed_dict=feed_dict)

            if np.abs(loss_numpy) > loss_threshold:
                raise ValueError("Extreme loss during training: ", loss_numpy)
            if np.isnan(loss_numpy) or np.isinf(loss_numpy):
                raise ValueError("NaN/Inf loss during training")
        assert (dataset_train is not None
                or end == len(index_shuf))  # Check that all examples were used
        cur = time.time()
        _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                     " seconds")
        saver.save(
            sess,
            '/nfs/nas4/data-hanwei/data-hanwei/DATA/models/cnn/ddn_adv_iter',
            global_step=epoch)
        if evaluate is not None:
            if use_ema:
                # Before running evaluation, load the running average
                # parameters into the live slot, so we can see how well
                # the EMA parameters are performing
                sess.run(swap)
            evaluate()
            if use_ema:
                # Swap the parameters back, so that we continue training
                # on the live parameters
                sess.run(swap)
    if use_ema:
        # When training is done, swap the running average parameters into
        # the live slot, so that we use them when we deploy the model
        sess.run(swap)

    return True
Ejemplo n.º 6
0
def main(args):
  logger = init_logger(args.run_name)

  # Datasets
  img_height, img_width, _ = InceptionV3.SHAPE

  def prep_func(f, x, y):
    x = read_image(x)
    x = decode_png(x)
    x = resize(x, img_height, img_width)
    return f, x, y

  trn_ds = make_dataset(args.train_dir, args.batch_size, prep_func,
                        shuffle=True, repeat=True, add_filenames=True)
  val_ds = make_dataset(args.train_dir, args.batch_size, prep_func,
                        shuffle=False, repeat=False, add_filenames=True)
  tst_ds = make_dataset(args.train_dir, args.batch_size, prep_func,
                        shuffle=False, repeat=False, add_filenames=True)

  num_classes = len(trn_ds.labels_map)

  it = tf.data.Iterator.from_structure(
    trn_ds.dataset.output_types, trn_ds.dataset.output_shapes)

  num_trn_batches = int(math.ceil(float(trn_ds.size) / args.batch_size))
  num_val_batches = int(math.ceil(float(val_ds.size) / args.batch_size))
  num_tst_batches = int(math.ceil(float(tst_ds.size) / args.batch_size))

  trn_init_op = it.make_initializer(trn_ds.dataset)
  val_init_op = it.make_initializer(val_ds.dataset)
  tst_init_op = it.make_initializer(tst_ds.dataset)

  # Filename, input image and corrsponding one hot encoded label
  f, x, y = it.get_next()

  sess = tf.Session()

  # Model and logits
  is_training = tf.placeholder(dtype=tf.bool)
  model = InceptionV3(nb_classes=num_classes, is_training=is_training)
  logits = model.get_logits(x)

  attacks_ord = {
    'inf': np.inf,
    '1': 1,
    '2': 2
  }

  # FGM attack
  attack_params = {
    'eps': args.eps,
    'clip_min': 0.0,
    'clip_max': 1.0,
    'ord': attacks_ord[args.ord],
  }
  attack = FastGradientMethod(model, sess)

  # Learning rate with exponential decay
  global_step = tf.Variable(0, trainable=False)
  global_step_update_op = tf.assign(global_step, tf.add(global_step, 1))
  lr = tf.train.exponential_decay(
    args.initial_lr, global_step, args.lr_decay_steps,
    args.lr_decay_factor, staircase=True)

  cross_entropy = CrossEntropy(model, attack=attack,
                               smoothing=args.label_smth,
                               attack_params=attack_params,
                               adv_coeff=args.adv_coeff)
  loss = cross_entropy.fprop(x, y)

  # Gradients clipping
  opt = tf.train.RMSPropOptimizer(learning_rate=lr, decay=args.opt_decay,
                                  epsilon=1.0)
  gvs = opt.compute_gradients(loss)
  clip_min, clip_max = -args.grad_clip, args.grad_clip

  capped_gvs = []
  for g, v in gvs:
    capped_g = tf.clip_by_value(g, clip_min, clip_max) \
      if g is not None else tf.zeros_like(v)
    capped_gvs.append((capped_g, v))

  train_op = opt.apply_gradients(capped_gvs)

  saver = tf.train.Saver()
  global_init_op = tf.global_variables_initializer()

  if args.load_model and args.restore_path:
    saver.restore(sess, args.restore_path)
    logger.info("Model restored from: ".format(args.restore_path))


  with sess.as_default():
    sess.run(global_init_op)

    best_val_acc = -1
    for epoch in range(args.num_epochs):
      logger.info("Epoch: {:04d}/{:04d}".format(epoch + 1, args.num_epochs))
      sess.run(trn_init_op)

      for batch in range(num_trn_batches):
        loss_np, lr_np, _ = sess.run([loss, lr, train_op],
                                     feed_dict={is_training: True})
        logger.info("Batch: {:04d}/{:04d}, loss: {:.05f}, lr: {:.05f}"
          .format(batch + 1, num_trn_batches, loss_np, lr_np))

      logger.info("Epoch completed...")

      sess.run(global_step_update_op)
      val_acc = eval_acc(sess, logits, y, num_val_batches,
                         is_training, val_init_op)
      logger.info("Validation set accuracy: {:.05f}".format(val_acc))

      if best_val_acc < val_acc:
        output_path = saver.save(sess, args.model_path)
        logger.info("Model was successfully saved: {}".format(output_path))
        best_val_acc = val_acc
        pass

    tst_acc = eval_acc(sess, logits, y, num_tst_batches,
                       is_training, tst_init_op)
    logger.info("Test set accuracy: {:.05f}".format(tst_acc))