Exemple #1
0
def build_eval_graph(x, y, scope):
    losses = {}

    with tf.variable_scope(scope, reuse=True):
        with tf.name_scope('Logits'):
            logits = train_utils.forward(x, create_summaries=False)

        # Create an op for the loss
        with tf.name_scope('Cross-Entropy-Loss'):
            ce_loss = layers.cross_entropy_loss(logits, y)
            losses['LOSS'] = ce_loss

        # Create op for the accuracy
        with tf.name_scope('Accuracy'):
            acc = layers.accuracy(logits, y)
            losses['ACC'] = acc
            losses['ERR'] = 1.0 - acc

        # Create op for the accuracy against FGSM attacker
        with tf.name_scope('Adversarial-Accuracy'):
            fgsm_perturbation, loss, raw_grad = train_utils.generate_adversarial_perturbation(x, ord='inf',
                                                                                              epsilon=FLAGS.eps_fgsm)

            # Compute adversarial examples
            fgsm_x_adv_unclipped = x + fgsm_perturbation
            fgsm_x_adv = tf.clip_by_value(fgsm_x_adv_unclipped, clip_value_min=0.0, clip_value_max=1.0)
            fgsm_x_adv = tf.stop_gradient(fgsm_x_adv)  # do not backprop through attack
            adv_logits = train_utils.forward(fgsm_x_adv, create_summaries=False)

            # Collect gradients
            raw_grad = tf.contrib.layers.flatten(raw_grad)
            raw_grad = tf.identity(raw_grad, name='fgsm-grad')
            raw_grad_norm = tf.norm(raw_grad, ord='euclidean')
            raw_grad_norm = tf.identity(raw_grad_norm, name='fgsm-grad-l2norm')
            losses['FGSM-GRAD-NORM'] = raw_grad_norm

            # Collect perturbation
            perturbation = tf.contrib.layers.flatten(fgsm_perturbation)
            perturbation = tf.identity(perturbation, name='fgsm-perturbation')
            tf.summary.histogram(perturbation.op.name, perturbation)
            perturbation_norm = tf.norm(perturbation, ord='euclidean')
            perturbation_norm = tf.identity(perturbation_norm, name='fgsm-perturbation-l2norm')
            losses['FGSM-PERTURBATION-NORM'] = perturbation_norm

            # Collect accuracy
            adv_acc = layers.accuracy(adv_logits, y)
            losses['ADVT-ACC'] = adv_acc
            losses['ADVT-ERR'] = 1.0 - adv_acc

    return losses, fgsm_perturbation, fgsm_x_adv
Exemple #2
0
    def __init__(self, input, label, k=1, **kwargs):
        super(Accuracy, self).__init__("accuracy", **kwargs)
        main_program = self.helper.main_program
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

        self.total = self.create_state(dtype='int64', shape=[1], suffix='total')
        self.correct = self.create_state(
            dtype='int64', shape=[1], suffix='correct')
        kwargs = {'main_program': main_program}
        total = self.helper.create_tmp_variable(dtype='int')
        correct = self.helper.create_tmp_variable(dtype='int')
        acc = layers.accuracy(
            input=input,
            label=label,
            k=k,
            total=total,
            correct=correct,
            **kwargs)
        total = layers.cast(x=total, dtype='int64', **kwargs)
        correct = layers.cast(x=correct, dtype='int64', **kwargs)
        layers.sums(input=[self.total, total], out=self.total, **kwargs)
        layers.sums(input=[self.correct, correct], out=self.correct, **kwargs)

        self.metrics.append(acc)
Exemple #3
0
  def classifier_graph(self):
    """Constructs classifier graph from inputs to classifier loss.

    * Caches the VatxtInput object in `self.cl_inputs`
    * Caches tensors: `cl_embedded`, `cl_logits`, `cl_loss`

    Returns:
      loss: scalar float.
    """
    inputs = _inputs('train', pretrain=False)
    self.cl_inputs = inputs
    embedded = self.layers['embedding'](inputs.tokens)
    self.tensors['cl_embedded'] = embedded

    _, next_state, logits, loss = self.cl_loss_from_embedding(
        embedded, return_intermediates=True)
    tf.summary.scalar('classification_loss', loss)
    self.tensors['cl_logits'] = logits
    self.tensors['cl_loss'] = loss

    acc = layers_lib.accuracy(logits, inputs.labels, inputs.weights)
    tf.summary.scalar('accuracy', acc)

    adv_loss = (self.adversarial_loss() * tf.constant(
        FLAGS.adv_reg_coeff, name='adv_reg_coeff'))
    tf.summary.scalar('adversarial_loss', adv_loss)

    total_loss = loss + adv_loss
    tf.summary.scalar('total_classification_loss', total_loss)

    with tf.control_dependencies([inputs.save_state(next_state)]):
      total_loss = tf.identity(total_loss)

    return total_loss
Exemple #4
0
    def _build(self, num_classifiers, learning_rate):
        # inputs
        self.X = tf.placeholder(tf.float32, [None, 28, 28])
        self.y = tf.placeholder(tf.int32, [None])
        one_hot_y = tf.one_hot(self.y, 10)

        networks = [layers.feedforward(self.X) for _ in range(num_classifiers)]
        self.individual_loss = [
            layers.loss(net, one_hot_y) for net in networks
        ]
        self.individual_accuracy = [
            layers.accuracy(net, one_hot_y) for net in networks
        ]

        logits = tf.reduce_mean(tf.stack(networks, axis=-1), axis=-1)
        l2_distance = tf.add_n([
            tf.norm(networks[0] - networks[1]),
            tf.norm(networks[1] - networks[2]),
            tf.norm(networks[2] - networks[0])
        ])

        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=one_hot_y)
        self.loss = tf.reduce_mean(cross_entropy) + 1e-4 * l2_distance
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.train_op = optimizer.minimize(self.loss)

        correct_prediction = tf.equal(tf.argmax(logits, axis=1),
                                      tf.argmax(one_hot_y, axis=1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        self.prediction = tf.argmax(logits, axis=1)
Exemple #5
0
    def __init__(self, input, label, k=1, **kwargs):
        super(Accuracy, self).__init__("accuracy", **kwargs)
        main_program = self.helper.main_program
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

        self.total = self.create_state(dtype='int64',
                                       shape=[1],
                                       suffix='total')
        self.correct = self.create_state(dtype='int64',
                                         shape=[1],
                                         suffix='correct')
        kwargs = {'main_program': main_program}
        total = self.helper.create_tmp_variable(dtype='int')
        correct = self.helper.create_tmp_variable(dtype='int')
        acc = layers.accuracy(input=input,
                              label=label,
                              k=k,
                              total=total,
                              correct=correct,
                              **kwargs)
        total = layers.cast(x=total, dtype='int64', **kwargs)
        correct = layers.cast(x=correct, dtype='int64', **kwargs)
        layers.sums(input=[self.total, total], out=self.total, **kwargs)
        layers.sums(input=[self.correct, correct], out=self.correct, **kwargs)

        self.metrics.append(acc)
Exemple #6
0
def build_eval_graph(x, y, ul_x):
    losses = {}
    logit = forward(x, is_training=False, update_batch_stats=False)
    nll_loss = L.ce_loss(logit, y)
    losses['NLL'] = nll_loss
    acc = L.accuracy(logit, y)
    losses['Acc'] = acc
    return losses
def build_test_graph(x, y):
    losses = {}
    logit = adt.forward(x, is_training=False, update_batch_stats=False)
    nll_loss = L.ce_loss(logit, y)
    losses['No_NLL'] = nll_loss
    acc = L.accuracy(logit, y)
    losses['No_Acc'] = acc
    return losses
Exemple #8
0
def build_eval_graph(x, y, ul_x):
    losses = {}
    logit = vat.forward(x, is_training=False, update_batch_stats=False)
    nll_loss = L.ce_loss(logit, y)
    losses['NLL'] = nll_loss
    acc = L.accuracy(logit, y)
    losses['Acc'] = acc
    scope = tf.get_variable_scope()
    scope.reuse_variables()
    at_loss = vat.adversarial_loss(x, y, nll_loss, is_training=True)
    losses['AT_loss'] = at_loss
    ul_logit = vat.forward(ul_x, is_training=False, update_batch_stats=False)
    vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit, is_training=False)
    losses['VAT_loss'] = vat_loss
    return losses
Exemple #9
0
    def classifier_graph(self):
        """Constructs classifier graph from inputs to classifier loss.

    * Caches the VatxtInput object in `self.cl_inputs`
    * Caches tensors: `cl_embedded`, `cl_logits`, `cl_loss`


    Returns:
      loss: scalar float.
    """
        inputs = _inputs('train', pretrain=False)
        self.cl_inputs = inputs
        self.tensors['inputs'] = inputs.tokens
        embedded = self.layers['embedding'](inputs.tokens)
        self.tensors['embedding_weight'] = self.layers['embedding'].var
        self.tensors['cl_embedded'] = embedded

        _, next_state, logits, loss = self.cl_loss_from_embedding(
            embedded, return_intermediates=True)
        tf.summary.scalar('classification_loss', loss)
        self.tensors['cl_logits'] = logits
        self.tensors['cl_loss'] = loss
        self.tensors['perturb'] = self.calculate_perturb(embedded, loss)

        if FLAGS.single_label:
            indices = tf.stack([tf.range(FLAGS.batch_size), inputs.length - 1],
                               1)
            labels = tf.expand_dims(tf.gather_nd(inputs.labels, indices), 1)
            weights = tf.expand_dims(tf.gather_nd(inputs.weights, indices), 1)
        else:
            labels = inputs.labels
            weights = inputs.weights

        self.tensors['labels'] = labels
        acc = layers_lib.accuracy(logits, labels, weights)
        tf.summary.scalar('accuracy', acc)

        adv_loss = (self.adversarial_loss() *
                    tf.constant(FLAGS.adv_reg_coeff, name='adv_reg_coeff'))
        tf.summary.scalar('adversarial_loss', adv_loss)

        total_loss = loss + adv_loss

        with tf.control_dependencies([inputs.save_state(next_state)]):
            total_loss = tf.identity(total_loss)
            tf.summary.scalar('total_classification_loss', total_loss)
        return total_loss
Exemple #10
0
	def _build(self, num_classifiers, learning_rate):
		# inputs
		self.X = tf.placeholder(tf.float32, [None, 28, 28])
		self.y = tf.placeholder(tf.int32, [None])
		one_hot_y = tf.one_hot(self.y, 10)
		
		networks = [layers.convolutional(self.X) for _ in range(num_classifiers)]
		self.individual_loss = [layers.loss(net, one_hot_y) for net in networks]
		self.individual_accuracy = [layers.accuracy(net, one_hot_y) for net in networks]
		
		logits = layers.linear(tf.concat(networks, axis=-1), 10, bias=False)
		
		cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=one_hot_y)
		self.loss = tf.reduce_mean(cross_entropy)
		optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
		self.train_op = optimizer.minimize(self.loss)
		
		correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(one_hot_y, axis=1))
		self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
		self.prediction = tf.argmax(logits, axis=1)
    def classifier_graph(self):
        """Constructs classifier graph from inputs to classifier loss.

    * Caches the VatxtInput objects in `self.cl_inputs`
    * Caches tensors: `cl_embedded` (tuple of forward and reverse), `cl_logits`,
      `cl_loss`

    Returns:
      loss: scalar float.
    """
        inputs = _inputs('train', pretrain=False, bidir=True)
        self.cl_inputs = inputs
        f_inputs, _ = inputs

        # Embed both forward and reverse with a shared embedding
        embedded = [self.layers['embedding'](inp.tokens) for inp in inputs]
        self.tensors['cl_embedded'] = embedded

        _, next_states, logits, loss = self.cl_loss_from_embedding(
            embedded, return_intermediates=True)
        tf.summary.scalar('classification_loss', loss)
        self.tensors['cl_logits'] = logits
        self.tensors['cl_loss'] = loss

        acc = layers_lib.accuracy(logits, f_inputs.labels, f_inputs.weights)
        tf.summary.scalar('accuracy', acc)

        adv_loss = (self.adversarial_loss() *
                    tf.constant(FLAGS.adv_reg_coeff, name='adv_reg_coeff'))
        tf.summary.scalar('adversarial_loss', adv_loss)

        total_loss = loss + adv_loss
        tf.summary.scalar('total_classification_loss', total_loss)

        saves = [
            inp.save_state(state) for (inp, state) in zip(inputs, next_states)
        ]
        with tf.control_dependencies(saves):
            total_loss = tf.identity(total_loss)

        return total_loss
Exemple #12
0
  def classifier_graph(self):
    """Constructs classifier graph from inputs to classifier loss.

    * Caches the VatxtInput objects in `self.cl_inputs`
    * Caches tensors: `cl_embedded` (tuple of forward and reverse), `cl_logits`,
      `cl_loss`

    Returns:
      loss: scalar float.
    """
    inputs = _inputs('train', pretrain=False, bidir=True)
    self.cl_inputs = inputs
    f_inputs, _ = inputs

    # Embed both forward and reverse with a shared embedding
    embedded = [self.layers['embedding'](inp.tokens) for inp in inputs]
    self.tensors['cl_embedded'] = embedded

    _, next_states, logits, loss = self.cl_loss_from_embedding(
        embedded, return_intermediates=True)
    tf.summary.scalar('classification_loss', loss)
    self.tensors['cl_logits'] = logits
    self.tensors['cl_loss'] = loss

    acc = layers_lib.accuracy(logits, f_inputs.labels, f_inputs.weights)
    tf.summary.scalar('accuracy', acc)

    adv_loss = (self.adversarial_loss() * tf.constant(
        FLAGS.adv_reg_coeff, name='adv_reg_coeff'))
    tf.summary.scalar('adversarial_loss', adv_loss)

    total_loss = loss + adv_loss


    saves = [inp.save_state(state) for (inp, state) in zip(inputs, next_states)]
    with tf.control_dependencies(saves):
      total_loss = tf.identity(total_loss)
      tf.summary.scalar('total_classification_loss', total_loss)
    return total_loss
Exemple #13
0
def training():

    pretrained_weights = './pretrain/vgg16.npy'
    data_dir = './data/cifar10_data/cifar-10-batches-bin'
    train_log_dir = './log/train/'
    val_log_dir = './log/val/'

    with tf.name_scope('input'):
        images_train, labels_train = input_data.read_cifar10(data_dir, is_train=True,
                                                                        batch_size=BATCH_SIZE, shuffle=True)
        images_val, labels_val = input_data.read_cifar10(data_dir, is_train=False,
                                                                        batch_size=BATCH_SIZE, shuffle=False)

    image_holder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES])

    logits = vgg.VGG16(image_holder, N_CLASSES, 0.8)
    loss = layers.loss(logits, label_holder)
    accuracy = layers.accuracy(logits, label_holder)

    global_steps = tf.Variable(0, name='global_step', trainable=False)
    train_op = layers.optimize(loss, LEARNING_RATE, global_steps)

    saver = tf.train.Saver(tf.global_variables())

    # Refenrnce: https://stackoverflow.com/questions/35413618/tensorflow-placeholder-error-when-using-tf-merge-all-summaries
    summary_op = tf.summary.merge_all()
    # summary_op = tf.summary.merge([loss_summary, accuracy_summary], tf.GraphKeys.SUMMARIES)

    # The main thread
    init = tf.global_variables_initializer()
    sess = tf.InteractiveSession()
    sess.run(init)

    print('########################## Start Training ##########################')

    layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8'])

    # Coordinate the relationship between threads
    # Reference: http://wiki.jikexueyuan.com/project/tensorflow-zh/how_tos/threading_and_queues.html
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    train_summary_writer = tf.summary.FileWriter(train_log_dir, graph=sess.graph)
    val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break
            # start_time  = time .time()

            train_images, train_labels = sess.run([images_train, labels_train])
            _, train_loss, train_acc, summary_str = sess.run([train_op, loss, accuracy, summary_op],
                                                feed_dict={image_holder: train_images, label_holder: train_labels})
            # duration = time.time() - start_time

            if step % 50 == 0 or (step + 1) == MAX_STEP:
                print('step %d, loss = %.4f, accuracy = %.4f%%' % (step, train_loss, train_acc))
                #summary_str = sess.run(summary_op)
                train_summary_writer.add_summary(summary_str, step)

            if step % 200 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run([images_val, labels_val])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={image_holder: val_images, label_holder: val_labels})
                print('step %d, val loss = %.2f, val accuracy = %.2f%%' % (step, val_loss, val_acc))

                #summary_str2 = sess.run(summary_op)
                val_summary_writer.add_summary(summary_str, step)

            # Why not use global_step=global_steps instead of step ???
            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        coord.request_stop()

    coord.request_stop()
    coord.join(threads)

    sess.close()
Exemple #14
0
def accuracy(inp,lab):
	global acc
	acc +=1
	return L.accuracy(inp,lab,'accuracy_'+str(acc))
Exemple #15
0
def main():
    """
    Train a model with the given parameters.

    :return: number of iterations, interval used to measure accuracy, weight mean and standard
        deviation for each iteration and weight, activations in each layer for each iteration
        as mean and standard deviation, accuracies including batch, train and test accuracy
    :rtype: (int, int, {'W_0_mean': numpy.ndarray, ...}, {'h_0_mean': numpy.ndarray, ...},
        {'batch_accuracies': numpy.ndarray, 'test_accuracies': numpy.ndarray, 'train_accuracies': numpy.ndarray})
    """

    # To be sure ...
    tf.reset_default_graph()

    test_images, test_labels = mnist.load('t10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte')
    train_images, train_labels = mnist.load('train-images.idx3-ubyte', 'train-labels.idx1-ubyte')

    test_images = test_images.reshape(test_images.shape[0], test_images.shape[1], test_images.shape[2], 1)
    train_images = train_images.reshape(train_images.shape[0], train_images.shape[1], train_images.shape[2], 1)

    train_dataset = dataset.Dataset(train_images, train_labels, args.batch_size)

    # Used for testing on trainng and testing set as GPU does not take the full training/
    # testing sets in memory.
    test_train_dataset = dataset.Dataset(train_images, train_labels, 1000)
    test_dataset = dataset.Dataset(test_images, test_labels, 1000)

    with tf.device('/gpu:0'):
        tf.set_random_seed(time.time()*1000)

        if args.cnn:
            x = tf.placeholder(tf.float32, shape = [None, 28, 28, 1])
        else:
            x = tf.placeholder(tf.float32, shape = [None, 784])

        if args.normalizer_data:
            n_x = normalizer('n_x', x)
        else:
            n_x = x

        y = tf.placeholder(tf.float32, shape = [None, 10])

        # Two convolutional layers if requested, otherwise fully connected layers.
        if args.cnn:

            kernel_size = [3, 3, 3, 3, 3, 3, 3]

            channels = [16]
            for i in range(1, args.layers):
                channels.append(max(channels[-1]*2, 64))

            # weights and biases
            W = []
            b = []

            # conv, activations, pooling and normalizes
            s = []
            h = []
            p = []
            n = [n_x]

            for i in range(args.layers):

                channels_in = 1
                if i > 0:
                    channels_in = channels[i - 1]

                W.append(initializer('W_conv' + str(i), [kernel_size[i], kernel_size[i], channels_in, channels[i]]))
                b.append(initializers.constant('b_conv' + str(i), [channels[i]], value = args.bias))

                s.append(layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1]))
                h.append(activation('h_' + str(i), s[-1]))
                p.append(layers.pooling('p_' + str(i), h[-1]))
                n.append(normalizer('n_' + str(i), p[-1]))

            shape = n[-1].get_shape().as_list()
            n[-1] = tf.reshape(n[-1], [-1, shape[1]*shape[2]*shape[3]])
        else:

            units = [1000, 1000, 1000, 1000, 1000]

            # weights and biases
            W = []
            b = []

            # linear, activations and normalized
            s = []
            h = []
            n = [n_x]

            for i in range(args.layers):
                units_in = 784
                if i > 0:
                    units_in = units[i - 1]

                W.append(initializer('W_fc' + str(i), [units_in, units[i]]))
                b.append(initializers.constant('b_fc' + str(i), [units[i]], value = args.bias))

                s.append(layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1]))
                h.append(activation('h_' + str(i), s[-1]))
                n.append(normalizer('n_' + str(i), h[-1]))

        W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100]))
        b.append(initializers.constant('b_fc3', [100], value = args.bias))

        s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1]))
        h.append(activation('h_3', s[-1]))
        n.append(normalizer('n_3', h[-1]))

        W.append(initializer('W_fc4', [100, 10]))
        b.append(initializers.constant('b_fc4', [10], value = args.bias))

        s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1]))
        y_ = layers.softmax('y_', s[-1])

        # Loss definition and optimizer.
        cross_entropy = layers.cross_entropy('cross_entropy', y_, y)

        weights = [v for v in tf.all_variables() if v.name.startswith('W')]
        loss = cross_entropy + args.regularizer_weight*regularizer('regularizer', weights)

        prediction = layers.prediction('prediction', y_)
        label = layers.label('label', y)
        accuracy = layers.accuracy('accuracy', prediction, label)
        optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

        tformer = transformer.Transformer()
        tformer.convert(np.float32)
        tformer.scale(1./255.)

        # Reshape input if necessary.
        if not args.cnn:
            tformer.reshape([None, 784])

        with tf.Session(config = tf.ConfigProto(log_device_placement = True)) as sess:
            sess.run(tf.initialize_all_variables())

            losses = []
            batch_accuracies = []
            train_accuracies = []
            test_accuracies = []

            n_W = args.layers + 2
            n_h = args.layers + 1

            weight_means = [[] for i in range(n_W)]
            activation_means = [[] for i in range(n_h)]

            iterations = args.epochs*(train_dataset.count//args.batch_size + 1)
            interval = iterations // 20
            for i in range(iterations):

                # If this is the last batch, we reshuffle after this step.
                was_last_batch = train_dataset.next_batch_is_last()

                batch = train_dataset.next_batch()
                res = sess.run([optimizer, cross_entropy, accuracy] + W + h,
                                feed_dict = {x: tformer.process(batch[0]), y: batch[1]})

                for j in range(n_W):
                    weight_means[j].append(np.mean(res[3 + j]))

                # Get mean and variance of all layer activations after normalization, i.e. after normalization
                for j in range(n_h):
                    activation_means[j].append(np.mean(res[3 + n_W + j]))

                losses.append(res[1])
                batch_accuracies.append(res[2])

                print('Loss [%d]: %f' % (i, res[1]))
                print('Batch accuracy [%d]: %f' % (i, res[2]))

                if was_last_batch:
                    train_dataset.shuffle()
                    train_dataset.reset()

                if i % interval == 0:

                    # Accuracy on training set.
                    train_accuracy = 0
                    batches = test_train_dataset.count // test_train_dataset.batch_size

                    for j in range(batches):
                        batch = test_train_dataset.next_batch()
                        train_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]})

                    train_accuracy = train_accuracy / batches
                    train_accuracies.append(train_accuracy)

                    print('Train accuracy [%d]: %f' % (i, train_accuracy))
                    test_train_dataset.reset()

                    # Accuracy on testing set.
                    test_accuracy = 0
                    batches = test_dataset.count // test_dataset.batch_size

                    for j in range(batches):
                        batch = test_dataset.next_batch()
                        test_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]})

                    test_accuracy = test_accuracy / batches
                    test_accuracies.append(test_accuracy)

                    print('Test accuracy [%d]: %f' % (i, test_accuracy))
                    test_dataset.reset()

            sess.close()

            weights = {}
            for j in range(args.layers):
                weights['W_' + str(j)] = np.array(weight_means[j])

            activations = {}
            for j in range(args.layers):
                activations['h_' + str(j)] = np.array(activation_means[j])

            accuracies = {
                'batch_accuracies': batch_accuracies,
                'train_accuracies': train_accuracies,
                'test_accuracies': test_accuracies
            }

            return iterations, interval, weights, activations, accuracies
Exemple #16
0
def main():
    """
    Train and evaluate the model with the chosen parameters.
    """

    test_images, test_labels = mnist.load('t10k-images.idx3-ubyte',
                                          't10k-labels.idx1-ubyte')
    train_images, train_labels = mnist.load('train-images.idx3-ubyte',
                                            'train-labels.idx1-ubyte')

    test_images = test_images.reshape(test_images.shape[0],
                                      test_images.shape[1],
                                      test_images.shape[2], 1)
    train_images = train_images.reshape(train_images.shape[0],
                                        train_images.shape[1],
                                        train_images.shape[2], 1)

    train_dataset = dataset.Dataset(train_images, train_labels,
                                    args.batch_size)

    # Used for testing on trainng and testing set as GPU does not take the full training/
    # testing sets in memory.
    test_train_dataset = dataset.Dataset(train_images, train_labels, 1000)
    test_dataset = dataset.Dataset(test_images, test_labels, 1000)

    with tf.device('/gpu:0'):
        if args.cnn:
            x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
        else:
            x = tf.placeholder(tf.float32, shape=[None, 784])

        y = tf.placeholder(tf.float32, shape=[None, 10])

        # Two convolutional layers if requested, otherwise fully connected layers.
        if args.cnn:

            kernel_size = [3, 3, 3, 3, 3, 3, 3]

            channels = [16]
            for i in range(1, args.layers):
                channels.append(max(channels[-1] * 2, 64))

            #if args.layers % 2 == 0:
            #    channels[args.layers//2 - 1] = 128
            #    channels[args.layers//2] = 128;

            #    for i in range(args.layers//2 - 1, -1, -1):
            #        channels[i] = channels[i + 1]//2
            #        channels[args.layers - i - 1] = channels[args.layers - i - 2]//2
            #else:
            #    channels[args.layers//2] = 128

            #    for i in range(args.layers//2 - 1, -1, -1):
            #        channels[i] = channels[i + 1]//2
            #        channels[args.layers - i - 1] = channels[args.layers - i - 2]//2

            # weights and biases
            W = []
            b = []

            # conv, activations, pooling and normalizes
            s = []
            h = []
            p = []
            n = [x]

            for i in range(args.layers):

                channels_in = 1
                if i > 0:
                    channels_in = channels[i - 1]

                W.append(
                    initializer('W_conv' + str(i), [
                        kernel_size[i], kernel_size[i], channels_in,
                        channels[i]
                    ]))
                b.append(
                    initializers.constant('b_conv' + str(i), [channels[i]],
                                          value=args.bias))

                s.append(
                    layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1]))
                h.append(activation('h_' + str(i), s[-1]))
                p.append(layers.pooling('p_' + str(i), h[-1]))
                n.append(normalizer('n_' + str(i), p[-1]))

            shape = n[-1].get_shape().as_list()
            n[-1] = tf.reshape(n[-1], [-1, shape[1] * shape[2] * shape[3]])
        else:

            units = [1000, 1000, 1000, 1000, 1000]

            # weights and biases
            W = []
            b = []

            # linear, activations and normalized
            s = []
            h = []
            n = [x]

            for i in range(args.layers):
                units_in = 784
                if i > 0:
                    units_in = units[i - 1]

                W.append(initializer('W_fc' + str(i), [units_in, units[i]]))
                b.append(
                    initializers.constant('b_fc' + str(i), [units[i]],
                                          value=args.bias))

                s.append(
                    layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1]))
                n.append(activation('h_' + str(i), s[-1]))
                n.append(normalizer('n_' + str(i), h[-1]))

        W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100]))
        b.append(initializers.constant('b_fc3', [100], value=args.bias))

        s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1]))
        h.append(activation('h_3', s[-1]))
        n.append(normalizer('n_3', h[-1]))

        W.append(initializer('W_fc4', [100, 10]))
        b.append(initializers.constant('b_fc4', [10], value=args.bias))

        s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1]))
        y_ = layers.softmax('y_', s[-1])

        # Loss definition and optimizer.
        cross_entropy = layers.cross_entropy('cross_entropy', y_, y)

        weights = [v for v in tf.all_variables() if v.name.startswith('W')]
        loss = cross_entropy + args.regularizer_weight * regularizer(
            'regularizer', weights)

        prediction = layers.prediction('prediction', y_)
        label = layers.label('label', y)
        accuracy = layers.accuracy('accuracy', prediction, label)
        optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

        tformer = transformer.Transformer()
        tformer.convert(np.float32)
        tformer.scale(1. / 255.)

        # Reshape input if necessary.
        if not args.cnn:
            tformer.reshape([None, 784])

        with tf.Session(config=tf.ConfigProto(
                log_device_placement=True)) as sess:
            sess.run(tf.initialize_all_variables())

            # First run to visualize variables and backpropagated gradients.
            batch = train_dataset.next_batch()
            res = sess.run([optimizer] + W + h,
                           feed_dict={
                               x: tformer.process(batch[0]),
                               y: batch[1]
                           })

            # Get all the weights for the plot.
            weights = {}
            for i in range(args.layers + 1):
                weights['W_' + str(i)] = res[1 + i]

            plots.plot_normalized_histograms(
                weights, 'images/' + name + '_weights_t0.png')

            # Get all the activations for the plot.
            activations = {}
            for i in range(args.layers + 1):
                activations['h_' + str(i)] = res[1 + args.layers + 1 + i]

            plots.plot_normalized_histograms(
                activations, 'images/' + name + '_activations_t0.png')

            losses = []
            batch_accuracies = []
            train_accuracies = []
            test_accuracies = []

            means = [[] for i in range(args.layers)]
            stds = [[] for i in range(args.layers)]

            iterations = args.epochs * (
                train_dataset.count // args.batch_size + 1)
            interval = iterations // 20
            for i in range(iterations):

                # If this is the last batch, we reshuffle after this step.
                was_last_batch = train_dataset.next_batch_is_last()

                batch = train_dataset.next_batch()
                res = sess.run([optimizer, cross_entropy, accuracy] + h,
                               feed_dict={
                                   x: tformer.process(batch[0]),
                                   y: batch[1]
                               })

                # Get mean and variance of all layer activations after normalization, i.e. after normalization
                for j in range(args.layers):
                    means[j].append(np.mean(res[3 + j]))
                    stds[j].append(np.std(res[3 + j]))

                losses.append(res[1])
                batch_accuracies.append(res[2])

                print('Loss [%d]: %f' % (i, res[1]))
                print('Batch accuracy [%d]: %f' % (i, res[2]))

                if was_last_batch:
                    train_dataset.shuffle()
                    train_dataset.reset()

                if i % interval == 0:

                    # Accuracy on training set.
                    train_accuracy = 0
                    batches = test_train_dataset.count // test_train_dataset.batch_size

                    for j in range(batches):
                        batch = test_train_dataset.next_batch()
                        train_accuracy += sess.run(accuracy,
                                                   feed_dict={
                                                       x: tformer.process(
                                                           batch[0]),
                                                       y: batch[1]
                                                   })

                    train_accuracy = train_accuracy / batches
                    train_accuracies.append(train_accuracy)

                    print('Train accuracy [%d]: %f' % (i, train_accuracy))
                    test_train_dataset.reset()

                    # Accuracy on testing set.
                    test_accuracy = 0
                    batches = test_dataset.count // test_dataset.batch_size

                    for j in range(batches):
                        batch = test_dataset.next_batch()
                        test_accuracy += sess.run(accuracy,
                                                  feed_dict={
                                                      x: tformer.process(
                                                          batch[0]),
                                                      y: batch[1]
                                                  })

                    test_accuracy = test_accuracy / batches
                    test_accuracies.append(test_accuracy)

                    print('Test accuracy [%d]: %f' % (i, test_accuracy))
                    test_dataset.reset()

                    # Plot loss for each iteration.
                    plots.plot_lines(
                        {'loss': (np.arange(0, i + 1), np.array(losses))},
                        'images/' + name + '_loss.png')

                    statistics = {}
                    for j in range(args.layers):
                        statistics['h_' + str(j) + '_mean'] = (np.arange(
                            0, i + 1), np.array(means[j]))
                        statistics['h_' + str(j) + '_std'] = (np.arange(
                            0, i + 1), np.array(stds[j]))

                    # Plot activations for each iteration.
                    plots.plot_lines(statistics,
                                     'images/' + name + '_activations.png')

                    # Plot measures accuracy every 250th iteration.
                    plots.plot_lines(
                        {
                            'batch_accuracy':
                            (np.arange(0, i + 1), np.array(batch_accuracies)),
                            'train_accuracy': (np.arange(0, i + 1, interval),
                                               np.array(train_accuracies)),
                            'test_accuracy': (np.arange(0, i + 1, interval),
                                              np.array(test_accuracies)),
                        }, 'images/' + name + '_accuracy.png')

            sess.close()
Exemple #17
0
def training():
    pretrained_weights = './pretrain/vgg16.npy'

    train_log_dir = './log_dr50000/train/'
    val_log_dir = './log_dr50000/val/'

    with tf.name_scope('input'):
        images_train, labels_train = dr5_input.input_data(True, BATCH_SIZE)
        images_val, labels_val = dr5_input.input_data(False, BATCH_SIZE)

    image_holder = tf.placeholder(tf.float32,
                                  shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    label_holder = tf.placeholder(tf.int32, shape=[BATCH_SIZE, N_CLASSES])

    logits = vgg.VGG16(image_holder, N_CLASSES, 0.5)
    loss = layers.loss(logits, label_holder)
    accuracy = layers.accuracy(logits, label_holder)

    global_steps = tf.Variable(0, name='global_step', trainable=False)
    LEARNING_RATE = tf.train.exponential_decay(start_rate,
                                               global_steps,
                                               decay_steps,
                                               deacy_rate,
                                               staircase=True)
    train_op = layers.optimize(loss, LEARNING_RATE, global_steps)

    saver = tf.train.Saver(tf.global_variables())

    summary_op = tf.summary.merge_all()

    # The main thread
    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    sess = tf.InteractiveSession()
    sess.run(init)

    print(
        '########################## Start Training ##########################')

    layers.load_with_skip(pretrained_weights, sess, ['fc6', 'fc7', 'fc8'])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    train_summary_writer = tf.summary.FileWriter(train_log_dir,
                                                 graph=sess.graph)
    val_summary_writer = tf.summary.FileWriter(val_log_dir, graph=sess.graph)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break
            # start_time  = time .time()

            train_images, train_labels = sess.run([images_train, labels_train])
            _, train_loss, train_acc, summary_str = sess.run(
                [train_op, loss, accuracy, summary_op],
                feed_dict={
                    image_holder: train_images,
                    label_holder: train_labels
                })
            # duration = time.time() - start_time

            if step % 50 == 0 or (step + 1) == MAX_STEP:
                print('step %d, loss = %.4f, accuracy = %.4f%%' %
                      (step, train_loss, train_acc))
                train_summary_writer.add_summary(summary_str, step)

            if step % 200 == 0 or (step + 1) == MAX_STEP:
                val_images, val_labels = sess.run([images_val, labels_val])
                val_loss, val_acc = sess.run([loss, accuracy],
                                             feed_dict={
                                                 image_holder: val_images,
                                                 label_holder: val_labels
                                             })
                print('step %d, val loss = %.2f, val accuracy = %.2f%%' %
                      (step, val_loss, val_acc))
                val_summary_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                lr = sess.run(LEARNING_RATE)
                print("step %d, learning_rate= %f" % (step, lr))

    except tf.errors.OutOfRangeError:
        coord.request_stop()

    coord.request_stop()
    coord.join(threads)

    sess.close()
Exemple #18
0
def build_training_graph(is_training, X_l, Y_l, ID_l, X_u, ID_u, K, lr, mom,
                         lgc_alpha):
    CACHE_F = tf.get_variable("cache_f")
    W = tf.get_variable("Affinity_matrix")
    D = tf.get_variable("D")
    assert (K.shape[0] == W.shape[0])
    with tf.variable_scope("CNN", reuse=tf.AUTO_REUSE) as scope:
        losses = {}
        #logit_l = sup_classifier.logit_small(X_l,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=False)
        #logit_u = sup_classifier.logit_small(X_u,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=is_training)
    logit_l = tf.gather(CACHE_F, ID_l)
    logit_u = tf.gather(CACHE_F, ID_u)

    losses['xent_sup'] = L.ce_loss(logit_l, Y_l)
    losses['mse_sup'] = tf.losses.mean_squared_error(tf.nn.softmax(logit_l),
                                                     Y_l)
    losses['mean_acc'] = L.accuracy(logit_l, Y_l)

    #Concatenate ids and logits
    ids = tf.concat([ID_l, ID_u], 0)
    logits = tf.concat([logit_l, logit_u], 0)
    logits = tf.nn.softmax(logits)
    #assert ids.shape[0] == logits.shape[0]

    if FLAGS.loss_func == "lgc":
        #Unsupervised loss
        K_ids = tf.gather(K, ids, axis=0)  #Get neighbor pairs
        K_ids_i = tf.reshape(K_ids[:, :, 0], [-1])
        K_ids_j = tf.reshape(K_ids[:, :, 1], [-1])
        logits_ids = tf.reshape(
            tf.tile(tf.expand_dims(tf.range(tf.shape(logits)[0]), -1),
                    [1, FLAGS.affmat_k]), [-1])

        #all_F_i = tf.gather(logits,logits_ids,axis=0)
        all_F_i = tf.gather(tf.nn.softmax(CACHE_F), K_ids_i, axis=0)
        all_F_j = tf.gather(tf.nn.softmax(CACHE_F), K_ids_j,
                            axis=0)  #F_j comes from cache
        all_Wij = tf.reshape(tf.gather(W, ids, axis=0), [-1])
        all_Dii = tf.gather(D, K_ids_i, axis=0)
        all_Djj = tf.gather(D, K_ids_j, axis=0)
        all_Dii = tf.tile(all_Dii[:, None], [1, all_F_i.shape[1]])
        all_Djj = tf.tile(all_Djj[:, None], [1, all_F_j.shape[1]])

        all_Fi = tf.multiply(all_Dii, all_F_i)
        all_Fj = tf.multiply(all_Djj, all_F_j)
        LGC_unsupervised_loss = (tf.multiply(
            tf.reduce_sum(tf.square(all_Fi - all_Fj), axis=1), all_Wij))
        #LGC_unsupervised_loss = tf.reduce_sum(tf.square(all_F_i - all_F_j),axis=1)
        losses["lgc_unsupervised_loss"] = tf.reduce_sum(LGC_unsupervised_loss)
        losses["lgc_supervised_loss"] = losses['xent_sup']

        losses["lgc_unsupervised_loss"] = (
            int(K.shape[0]) / int(FLAGS.batch_size + FLAGS.ul_batch_size)
        ) * losses["lgc_unsupervised_loss"]
        losses["lgc_supervised_loss"] = (
            FLAGS.num_labeled / int(FLAGS.batch_size)) * losses['mse_sup']

        lgc_lamb = 1 / lgc_alpha - 1
        losses["lgc_loss"] = losses[
            "lgc_unsupervised_loss"] + lgc_lamb * losses["lgc_supervised_loss"]

        #Assign to cache
        assign_op_l = tf.scatter_update(ref=CACHE_F,
                                        indices=ID_l,
                                        updates=tf.nn.softmax(logit_l))
        assign_op_u = tf.scatter_update(ref=CACHE_F,
                                        indices=ID_u,
                                        updates=tf.nn.softmax(logit_u))
        #assign_to_cache = tf.group(assign_op_l,assign_op_u)
        assign_to_cache = tf.no_op()

        #Get Trainable vars
        tvars = tf.trainable_variables()
        tvars_W = list(
            filter(lambda x: np.char.find(x.name, "Affinity_matrix") != -1,
                   tvars))
        tvars_D = list(filter(lambda x: np.char.find(x.name, "D") != -1,
                              tvars))
        tvars_CNN = list(
            filter(lambda x: np.char.find(x.name, "CNN") != -1, tvars))
        tvars_CACHE = list(
            filter(lambda x: np.char.find(x.name, "cache") != -1, tvars))

        print([var.name for var in tvars_CACHE])

        opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
        grads_and_vars = opt.compute_gradients(losses['lgc_loss'], tvars_CACHE)
        train_op = opt.apply_gradients(
            grads_and_vars, global_step=tf.train.get_or_create_global_step())
    else:
        assign_to_cache = None
        opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
        grads_and_vars = opt.compute_gradients(losses['xent_sup'], tvars)
        train_op = opt.apply_gradients(
            grads_and_vars, global_step=tf.train.get_or_create_global_step())

    return losses, train_op, assign_to_cache, logit_l, all_Wij