Exemple #1
0
 def prepare_trainer(self, generator_loss, discriminator_loss):
     '''Helper function for init_opt'''
     all_vars = tf.trainable_variables() # 给出所有的训练的变量
     # 所有变量命名格式 g_ d_ 来区分 G D 变量
     g_vars = [var for var in all_vars if
               var.name.startswith('g_')]
     d_vars = [var for var in all_vars if
               var.name.startswith('d_')]
     # 定义G的优化op:
     # 先定义优化器
     generator_opt = tf.train.AdamOptimizer(self.generator_lr,
                                            beta1=0.5)
     #好,我现在的定义好了G的训练器 (优化器的选择, 训练的loss目标, 以及训练的变量(其他变量不求导,fixed))
     self.generator_trainer =\
         pt.apply_optimizer(generator_opt,
                            losses=[generator_loss],
                            var_list=g_vars)
     discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr,
                                                beta1=0.5)
     self.discriminator_trainer =\
         pt.apply_optimizer(discriminator_opt,
                            losses=[discriminator_loss],
                            var_list=d_vars)
     self.log_vars.append(("g_learning_rate", self.generator_lr))
     self.log_vars.append(("d_learning_rate", self.discriminator_lr))
 def prepare_trainer(self, generator_loss, discriminator_loss):
     '''Helper function for init_opt'''
     all_vars = tf.trainable_variables()
     g_vars = [var for var in all_vars if var.name.startswith('g_')]
     d_vars = [var for var in all_vars if var.name.startswith('d_')]
     generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5)
     self.generator_trainer =\
         pt.apply_optimizer(generator_opt, losses=[generator_loss], var_list=g_vars)
     discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr,
                                                beta1=0.5)
     self.discriminator_trainer =\
         pt.apply_optimizer(discriminator_opt, losses=[discriminator_loss], var_list=d_vars)
     self.log_vars.append(("g_learning_rate", self.generator_lr))
     self.log_vars.append(("d_learning_rate", self.discriminator_lr))
    def prepare_trainer(self, generator_loss, discriminator_loss, encoder_loss):
        '''Helper function for init_opt'''
        all_vars = tf.trainable_variables()

        if cfg.TRAIN.GENERATOR:
            g_vars = [var for var in all_vars if
                  var.name.startswith('g_')]
        
            generator_opt = tf.train.AdamOptimizer(self.generator_lr,
                                                   beta1=0.5)
            self.generator_trainer =\
            pt.apply_optimizer(generator_opt,
                               losses=[generator_loss],
                               var_list=g_vars)
            
            self.log_vars.append(("e_learning_rate", self.encoder_lr))
            

        if cfg.TRAIN.SUPERVISED and cfg.TRAIN.ENCODER:
            e_vars = [var for var in all_vars if
                  var.name.startswith('e_')]
            encoder_opt = tf.train.AdamOptimizer(self.encoder_lr,
                                               beta1=0.5)
            self.encoder_trainer =\
            pt.apply_optimizer(encoder_opt,
                               losses=[encoder_loss],
                               var_list=e_vars)
        
            self.log_vars.append(("g_learning_rate", self.generator_lr))
            
        if cfg.TRAIN.DISCRIMINATOR:
            d_vars_to_train = []
            if cfg.TRAIN.DISCRIMINATOR_IMAGES:
                d_i_vars = [var for var in all_vars if var.name.startswith('d_i_')]
                d_vars_to_train += d_i_vars
            if cfg.TRAIN.DISCRIMINATOR_LATENTS:
                d_l_vars = [var for var in all_vars if var.name.startswith('d_l_')]
                d_vars_to_train += d_l_vars
            if cfg.TRAIN.DISCRIMINATOR_FUSION:
                d_f_vars = [var for var in all_vars if var.name.startswith('d_f')]
                d_vars_to_train += d_f_vars

            discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr,
                                                   beta1=0.5)
            self.discriminator_trainer =\
            pt.apply_optimizer(discriminator_opt,
                               losses=[discriminator_loss],
                               var_list=d_vars_to_train)
            
            self.log_vars.append(("d_learning_rate", self.discriminator_lr))
Exemple #4
0
def main(_=None):
    # Since we are feeding our data as numpy arrays, we need to create
    # placeholders in the graph.
    # These must then be fed using the feed dict.
    image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

    # Create our model.  The result of softmax_classifier is a namedtuple
    # that has members result.loss and result.softmax.
    if FLAGS.model == 'full':
        result = multilayer_fully_connected(image_placeholder,
                                            labels_placeholder)
    elif FLAGS.model == 'conv':
        result = lenet5(image_placeholder, labels_placeholder)
    else:
        raise ValueError('model must be full or conv: %s' % FLAGS.model)

    # For tracking accuracy in evaluation, we need to add an evaluation node.
    # We only include this part of the graph when testing, so we need to specify
    # that in the phase.
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    accuracy = result.softmax.evaluate_classifier(
        labels_placeholder, phase=pt.Phase.test)

    # Grab the data as numpy arrays.
    train_images, train_labels = data_utils.mnist(training=True)
    test_images, test_labels = data_utils.mnist(training=False)

    # Create the gradient optimizer and apply it to the graph.
    # pt.apply_optimizer adds regularization losses and sets up a step counter
    # (pt.global_step()) for you.
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # We can set a save_path in the runner to automatically checkpoint every so
    # often.  Otherwise at the end of the session, the model will be lost.
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(10):
            # Shuffle the training data.
            train_images, train_labels = data_utils.permute_data(
                (train_images, train_labels))

            runner.train_model(
                train_op,
                result.loss,
                EPOCH_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images,
                                              train_labels),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                TEST_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images,
                                              test_labels))
            print('Accuracy after %d epoch %g%%' %
                  (epoch + 1, classification_accuracy * 100))
Exemple #5
0
  def test_queues(self):
    qr = FakeQueueRunner()
    tf.train.add_queue_runner(qr)
    runner = local_trainer.Runner()
    with tf.Session():
      optimizer = tf.train.GradientDescentOptimizer(0.5)
      train_op = pt.apply_optimizer(optimizer,
                                    losses=[self.softmax_result.loss])

      runner.train_model(train_op,
                         self.softmax_result.loss,
                         100,
                         (self.input, self.target),
                         self.xor_data,
                         print_every=2)
    with tf.Session():
      with self.assertRaisesRegexp(ValueError, r'.*\bstop_queues\b.*'):
        runner.train_model(train_op,
                           self.softmax_result.loss,
                           100,
                           (self.input, self.target),
                           self.xor_data,
                           print_every=2)

    runner.stop_queues()
    qr.assert_worked(self)
    def __init__(self):
        self.data_directory = os.path.join(FLAGS.working_directory, "MNIST")
        if not os.path.exists(self.data_directory):
            os.makedirs(self.data_directory)
        self.save_path = FLAGS.working_directory + '/save.ckpt'
        self.mnist = read_data_set("/tmp/vae/converted_java.npy")

        self.input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, 28 * 28])

        with pt.defaults_scope(activation_fn=tf.nn.elu,
                               batch_normalize=True,
                               learned_moments_update_rate=0.0003,
                               variance_epsilon=0.001,
                               scale_after_normalization=True):
            with pt.defaults_scope(phase=pt.Phase.train):
                with tf.variable_scope("model") as scope:
                    self.output_tensor, self.mean, self.stddev = decoder(encoder(self.input_tensor))

            with pt.defaults_scope(phase=pt.Phase.test):
                with tf.variable_scope("model", reuse=True) as scope:
                    self.sampled_tensor, _, _ = decoder()

        self.vae_loss = get_vae_cost(self.mean, self.stddev)
        self.rec_loss = get_reconstruction_cost(self.output_tensor, self.input_tensor)

        self.loss = self.vae_loss + self.rec_loss

        self.optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0)
        self.train = pt.apply_optimizer(self.optimizer, losses=[self.loss])

        self.init = tf.initialize_all_variables()
        
        self.saver = tf.train.Saver()
Exemple #7
0
def main(_=None):
    # Since we are feeding our data as numpy arrays, we need to create
    # placeholders in the graph.
    # These must then be fed using the feed dict.
    image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

    # Create our model.  The result of softmax_classifier is a namedtuple
    # that has members result.loss and result.softmax.
    if FLAGS.model == 'full':
        result = multilayer_fully_connected(image_placeholder,
                                            labels_placeholder)
    elif FLAGS.model == 'conv':
        result = lenet5(image_placeholder, labels_placeholder)
    else:
        raise ValueError('model must be full or conv: %s' % FLAGS.model)

    # For tracking accuracy in evaluation, we need to add an evaluation node.
    # We only include this part of the graph when testing, so we need to specify
    # that in the phase.
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                                  phase=pt.Phase.test)

    # Grab the data as numpy arrays.
    train_images, train_labels = data_utils.mnist(training=True)
    test_images, test_labels = data_utils.mnist(training=False)

    # Create the gradient optimizer and apply it to the graph.
    # pt.apply_optimizer adds regularization losses and sets up a step counter
    # (pt.global_step()) for you.
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # We can set a save_path in the runner to automatically checkpoint every so
    # often.  Otherwise at the end of the session, the model will be lost.
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(10):
            # Shuffle the training data.
            train_images, train_labels = data_utils.permute_data(
                (train_images, train_labels))

            runner.train_model(
                train_op,
                result.loss,
                EPOCH_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images,
                                              train_labels),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                TEST_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images,
                                              test_labels))
            print('Accuracy after %d epoch %g%%' %
                  (epoch + 1, classification_accuracy * 100))
def run_model(result):
    accuracy = result.softmax.evaluate_classifier\
               (labels_placeholder,phase=pt.Phase.test)
    train_images, train_labels = data_utils.mnist(training=True)
    test_images, test_labels = data_utils.mnist(training=False)
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in range(0, 10):
            train_images, train_labels = \
                          data_utils.permute_data\
                          ((train_images, train_labels))
            runner.train_model(train_op,result.\
                               loss,EPOCH_SIZE,\
                               feed_vars=(image_placeholder,\
                                          labels_placeholder),\
                               feed_data=pt.train.\
                               feed_numpy(BATCH_SIZE,\
                                          train_images,\
                                          train_labels),\
                               print_every=100)
            classification_accuracy = runner.evaluate_model\
                                      (accuracy,\
                                       TEST_SIZE,\
                                       feed_vars=(image_placeholder,\
                                                  labels_placeholder),\
                                       feed_data=pt.train.\
                                       feed_numpy(BATCH_SIZE,\
                                                  test_images,\
                                                  test_labels))
        print("epoch", epoch + 1)
        print("accuracy", classification_accuracy)
  def test_queues(self):
    qr = FakeQueueRunner()
    tf.train.add_queue_runner(qr)
    runner = local_trainer.Runner()
    with tf.Session():
      optimizer = tf.train.GradientDescentOptimizer(0.5)
      train_op = pt.apply_optimizer(optimizer,
                                    losses=[self.softmax_result.loss])

      runner.train_model(train_op,
                         self.softmax_result.loss,
                         100,
                         (self.input, self.target),
                         self.xor_data,
                         print_every=2)
    with tf.Session():
      with self.assertRaisesRegexp(ValueError, r'.*\bstop_queues\b.*'):
        runner.train_model(train_op,
                           self.softmax_result.loss,
                           100,
                           (self.input, self.target),
                           self.xor_data,
                           print_every=2)

    runner.stop_queues()
    qr.assert_worked(self)
    def test_eval(self):
        f = os.path.join(self.tmp_file, "checkpoint")
        runner = local_trainer.Runner(save_path=f)
        with tf.Session():
            classification_acuracy = self.softmax_result.softmax.evaluate_classifier(self.target, phase=pt.Phase.test)

            optimizer = tf.train.GradientDescentOptimizer(0.2)
            train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss])

            runner.train_model(
                train_op, self.softmax_result.loss, 100, (self.input, self.target), self.xor_data, print_every=50
            )
            self.assertTrue(runner._last_init)
            save_paths = list(runner._saver.last_checkpoints)

            # The accuracy should be 50% right now since model is consistently
            # generated.
            accuracy = runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data)
            self.assertEquals(runner._saver.last_checkpoints, save_paths, "No additional paths should have been saved.")
            self.assertFalse(runner._last_init)
            self.assertEqual(accuracy, 0.5)

            # Train the model to 100% accuracy.
            runner.train_model(
                train_op, self.softmax_result.loss, 2000, (self.input, self.target), self.xor_data, print_every=1000
            )
            accuracy = runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data)
            self.assertFalse(runner._last_init)

            # Make sure that the previous computation didn't impact this eval.
            self.assertEqual(accuracy, 1.0)
Exemple #11
0
 def define_one_trainer(self, loss, learning_rate, key_word):
     '''Helper function for init_opt'''
     all_vars = tf.trainable_variables()
     tarin_vars = [var for var in all_vars if var.name.startswith(key_word)]
     opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5)
     trainer = pt.apply_optimizer(opt, losses=[loss], var_list=tarin_vars)
     return trainer
def train_neural_network(X, Y):
    '''
    predict = convolutional_neural_network(X)
    cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predict, labels=Y))
    optimizer = tf.train.AdamOptimizer().minimize(cost_func)  # learning rate 默认 0.001
    correct = tf.equal(tf.argmax(predict, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
    '''

    # 使用prettytensor
    input_tensor = tf.reshape(X, [-1, 28, 28, 1])
    pretty_input = pt.wrap(input_tensor)
    predict, cost_func = (pretty_input. \
                          conv2d(kernel=5, depth=32, name='layer_conv1'). \
                          max_pool(kernel=2, stride=2). \
                          conv2d(kernel=5, depth=64, name='layer_conv2'). \
                          max_pool(kernel=2, stride=2). \
                          flatten(). \
                          fully_connected(size=1024, name='layer_fc1'). \
                          softmax_classifier(num_classes=n_output_layer, labels=Y)
                          )
    accuracy = predict.evaluate_classifier(Y)
    optimizer_ = tf.train.GradientDescentOptimizer(0.1)  # learning rate
    optimizer = pt.apply_optimizer(optimizer_, losses=[cost_func])

    epochs = 1
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        epoch_loss = 0
        for epoch in range(epochs):
            for step in range(mnist.train.num_examples // batch_size):
                x, y = mnist.train.next_batch(batch_size)
                _, c = session.run([optimizer, cost_func],
                                   feed_dict={
                                       X: x,
                                       Y: y
                                   })
                # epoch_loss += c
                # print(epoch, ' : ', epoch_loss)
                if step % 20 == 0:
                    print(
                        'epoch', epoch, '\t|', 'step', step, '\t|',
                        '\033[1;35m train acc \033[0m',
                        accuracy.eval({
                            X: x,
                            Y: y
                        }), '\t|'
                        '\033[1;35m test acc \033[0m', '\033[1;34m ' + str(
                            accuracy.eval({
                                X: mnist.test.images,
                                Y: mnist.test.labels
                            })) + '\033[0m')

        print('准确率: ',
              accuracy.eval({
                  X: mnist.test.images,
                  Y: mnist.test.labels
              }))
Exemple #13
0
    def define_one_trainer(self, loss, learning_rate, key_word):
        '''Helper function for init_opt'''
        all_vars = tf.trainable_variables()
        tarin_vars = [var for var in all_vars if
                      var.name.startswith(key_word)]

        opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5)
        trainer = pt.apply_optimizer(opt, losses=[loss], var_list=tarin_vars)
        return trainer
    def test_run(self):
        runner = local_trainer.Runner()
        with tf.Session():
            optimizer = tf.train.GradientDescentOptimizer(0.5)
            train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss])

            runner.train_model(
                train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2
            )
def main(_=None):
  print('Starting Baby Names')
  input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
  output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES])
  inp = lstm_func.reshape_data_to_lstm_format(input_placeholder)

  # Create a label for each timestep.
  lables_1 = tf.reshape(tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES])
  labels = lstm_func.reshape_data_to_lstm_format(lables_1, per_example_length=2)

  length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1])
  t = tf.concat(1, [tf.constant(numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder])
  per_example_weights = lstm_func.reshape_data_to_lstm_format(tf.sparse_to_dense(t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze()

  with tf.variable_scope('baby_names'):
    result = create_model(inp, labels, TIMESTEPS, per_example_weights)
  with tf.variable_scope('baby_names', reuse=True):
    test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test)

  accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test, per_example_weights=per_example_weights)
  batch_accuracy = result.softmax.evaluate_classifier(labels, phase=pt.Phase.train, per_example_weights=per_example_weights)

  names, sex, lengths = data_baby_names.baby_names(TIMESTEPS)

  epoch_size = len(names) // BATCH_SIZE
  optimizer = tf.train.AdagradOptimizer(tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True))
  train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

  # We can set a save_path in the runner to automatically checkpoint every so
  # often.  Otherwise at the end of the session, the model will be lost.
  runner = pt.train.Runner(save_path=FLAGS.save_path)
  
  gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.25)
  sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

  # with tf.Session():
  for epoch in xrange(100):
    # Shuffle the training data.
    names, sex, lengths = permute.permute_data((names, sex, lengths))

    runner.train_model(
        train_op,
        [result.loss, batch_accuracy],
        epoch_size,
        feed_vars=(input_placeholder, output_placeholder, length_placeholder),
        feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths),
        print_every=100)
    classification_accuracy = runner.evaluate_model(
        accuracy,
        epoch_size,
        print_every=0,
        feed_vars=(input_placeholder, output_placeholder, length_placeholder),
        feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths))

    print('Accuracy after epoch %d: %g%%' % (
        epoch + 1, classification_accuracy * 100))
Exemple #16
0
    def train_op_loss(self, input_placeholder, labels, reuse=None):
        # Training and eval graph
        with tf.variable_scope(self.name, reuse=reuse):
            # Core train graph
            result = self.create(input_placeholder,
                                 pt.Phase.train).softmax(labels)

            train_op = pt.apply_optimizer(tf.train.AdagradOptimizer(0.5),
                                          losses=[result.loss])
            return train_op, result.loss
Exemple #17
0
def main(_=None):
    image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 22, 95])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 2])

    # Create our model.  The result of softmax_classifier is a namedtuple
    # that has members result.loss and result.softmax.
    images = pt.wrap(tf.expand_dims(image_placeholder, -1))
    with pt.defaults_scope(activation_fn=tf.nn.relu, l2loss=0.00001):
        result = (images
                    .conv2d(5, 20)
                    # .max_pool(2, 2)
                    .conv2d(5, 50)
                    # .max_pool(2, 2)
                    .flatten()
                    .fully_connected(500)
                    .dropout(0.5)
                    .softmax_classifier(2, labels_placeholder))

    accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                                  phase=pt.Phase.test)

    # Grab the data as numpy arrays.
    train_images, train_labels, test_images, test_labels = prepare_data()

    # Create the gradient optimizer and apply it to the graph.
    # pt.apply_optimizer adds regularization losses and sets up a step counter
    # (pt.global_step()) for you.
    optimizer = tf.train.AdamOptimizer()
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # We can set a save_path in the runner to automatically checkpoint every so
    # often.  Otherwise at the end of the session, the model will be lost.
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        print('Initializing')
        tf.initialize_all_variables().run()
        for epoch in xrange(EPOCHS):
            # Shuffle the training data.
            train_images, train_labels = permute_data(
                (train_images, train_labels))

            runner.train_model(
                train_op,
                result.loss,
                len(train_images),
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                len(test_images),
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels))
            print('Accuracy after %d epoch %g%%' % (
                epoch + 1, classification_accuracy * 100))
Exemple #18
0
    def test_run(self):
        runner = local_trainer.Runner()
        with tf.Session():
            optimizer = tf.train.GradientDescentOptimizer(0.5)
            train_op = pt.apply_optimizer(optimizer,
                                          losses=[self.softmax_result.loss])

            runner.train_model(train_op,
                               self.softmax_result.loss,
                               10, (self.input, self.target),
                               self.xor_data,
                               print_every=2)
Exemple #19
0
    def initialize(self,
                   n_iter=1000,
                   n_data=None,
                   n_print=100,
                   optimizer=None,
                   sess=None):
        """
        Initialize inference algorithm.

        Parameters
        ----------
        n_iter : int, optional
            Number of iterations for optimization.
        n_data : int, optional
            Number of samples for data subsampling. Default is to use all
            the data.
        n_print : int, optional
            Number of iterations for each print progress.
        optimizer : str, optional
            Whether to use TensorFlow optimizer or PrettyTensor
            optimizer if using PrettyTensor. Defaults to TensorFlow.
        sess : tf.Session, optional
            TensorFlow session for computation.
        """
        self.n_iter = n_iter
        self.n_data = n_data
        self.n_print = n_print

        self.loss = tf.constant(0.0)

        loss = self.build_loss()
        if optimizer == None:
            # Use ADAM with a decaying scale factor
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            self.train = optimizer.minimize(loss, global_step=global_step)
        else:
            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            self.train = pt.apply_optimizer(optimizer, losses=[loss])

        init = tf.initialize_all_variables()
        if sess == None:
            sess = tf.Session()

        sess.run(init)
        return sess
Exemple #20
0
    def initialize(self, n_iter=1000, n_data=None, n_print=100,
        optimizer=None, scope=None):
        """Initialize variational inference algorithm.

        Set up ``tf.train.AdamOptimizer`` with a decaying scale factor.

        Initialize all variables

        Parameters
        ----------
        n_iter : int, optional
            Number of iterations for optimization.
        n_data : int, optional
            Number of samples for data subsampling. Default is to use all
            the data.
        n_print : int, optional
            Number of iterations for each print progress. To suppress print
            progress, then specify None.
        optimizer : str, optional
            Whether to use TensorFlow optimizer or PrettyTensor
            optimizer when using PrettyTensor. Defaults to TensorFlow.
        scope : str, optional
            Scope of TensorFlow variable objects to optimize over.
        """
        self.n_iter = n_iter
        self.n_data = n_data
        self.n_print = n_print

        self.loss = tf.constant(0.0)

        loss = self.build_loss()
        if optimizer is None:
            var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                         scope=scope)
            # Use ADAM with a decaying scale factor
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                global_step,
                                                100, 0.9, staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            self.train = optimizer.minimize(loss, global_step=global_step,
                                            var_list=var_list)
        else:
            if scope is not None:
                raise NotImplementedError("PrettyTensor optimizer does not accept a variable scope.")

            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            self.train = pt.apply_optimizer(optimizer, losses=[loss])

        init = tf.initialize_all_variables()
        init.run()
Exemple #21
0
def main(_=None):
    image_shape = inp.get_image_shape(FLAGS.input_folder)
    batch_shape = (BATCH_SIZE,) + image_shape

    print('>>', image_shape, batch_shape)

    image_placeholder  = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

    if FLAGS.model == 'full':
        print('fully connected network')
        result = multilayer_fully_connected(image_placeholder, labels_placeholder)
    elif FLAGS.model == 'conv':
        print('conv network')
        result = lenet5(image_placeholder, labels_placeholder)

    accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                                  phase=pt.Phase.test)

    # Grab the data as numpy arrays.
    train_images, train_labels = data_utils.mnist(training=True)
    test_images,  test_labels  = data_utils.mnist(training=False)

    print(train_images.shape)
    print(train_labels.shape)

    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(20):
            # Shuffle the training data.
            train_images, train_labels = data_utils.permute_data(
                (train_images, train_labels))
            train_images = inp.get_images(FLAGS.input_folder)

            runner.train_model(
                train_op,
                result.loss,
                _epoch_size,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                _test_size,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels))
            print('Accuracy after %d epoch %g%%' % (
                epoch + 1, classification_accuracy * 100))
Exemple #22
0
    def prepare_trainer(self, generator_loss, discriminator_loss):
        '''Helper function for init_opt'''
        all_vars = tf.trainable_variables()

        g_vars = [var for var in all_vars if
                  var.name.startswith('g_')]
        d_vars = [var for var in all_vars if
                  var.name.startswith('d_')]

        generator_opt = tf.train.AdamOptimizer(self.generator_lr,
                                               beta1=0.5)
        self.generator_trainer =\
            pt.apply_optimizer(generator_opt,
                               losses=[generator_loss],
                               var_list=g_vars)
        discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr,
                                                   beta1=0.5)
        self.discriminator_trainer =\
            pt.apply_optimizer(discriminator_opt,
                               losses=[discriminator_loss],
                               var_list=d_vars)
        self.log_vars.append(("g_learning_rate", self.generator_lr))
        self.log_vars.append(("d_learning_rate", self.discriminator_lr))
    def test_checkpoint(self):
        f = os.path.join(self.tmp_file, "checkpoint")
        runner = local_trainer.Runner(save_path=f)
        with tf.Session():
            optimizer = tf.train.GradientDescentOptimizer(0.1)
            train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss])

            runner.train_model(
                train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2
            )
        assert runner._saver.last_checkpoints, "Expected checkpoints."
        for x in runner._saver.last_checkpoints:
            self.assertTrue(os.path.isfile(x), "Promised file not saved: %s" % x)
            self.assertTrue(x.startswith(f), "Name not as expected: %s" % x)
Exemple #24
0
    def train(self, epochs, batch_size, learning_rate, save_to=None):

        self.train_step = pt.apply_optimizer(tf.train.AdamOptimizer(learning_rate, epsilon=1), losses = [self.error_function])
        init = tf.initialize_all_variables()
        self.sess.run(init)
        pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=epochs).start()
        while self.get_epoch() < epochs:
            input_data = self.hdf5reader.next()
            _, loss_value = self.sess.run(
                [self.train_step, self.error_function],
                {
                    self.encoder.input_data: input_data
                }
            )
            pbar.update(self.get_epoch())
        pbar.finish()
Exemple #25
0
    def initialize(self, n_data=None):
        # TODO refactor to use VariationalInference's initialize()
        self.n_data = n_data

        # TODO don't fix number of covariates
        self.x = tf.placeholder(tf.float32, [self.n_data, 28 * 28])
        self.losses = tf.constant(0.0)

        loss = self.build_loss()
        optimizer = tf.train.AdamOptimizer(1e-2, epsilon=1.0)
        # TODO move this to not rely on Pretty Tensor
        self.train = pt.apply_optimizer(optimizer, losses=[loss])

        init = tf.initialize_all_variables()
        sess = tf.Session()
        sess.run(init)
        return sess
Exemple #26
0
    def initialize(self, n_iter=1000, n_data=None, n_print=100,
        optimizer=None, sess=None):
        """
        Initialize inference algorithm.

        Parameters
        ----------
        n_iter : int, optional
            Number of iterations for optimization.
        n_data : int, optional
            Number of samples for data subsampling. Default is to use all
            the data.
        n_print : int, optional
            Number of iterations for each print progress.
        optimizer : str, optional
            Whether to use TensorFlow optimizer or PrettyTensor
            optimizer if using PrettyTensor. Defaults to TensorFlow.
        sess : tf.Session, optional
            TensorFlow session for computation.
        """
        self.n_iter = n_iter
        self.n_data = n_data
        self.n_print = n_print

        self.loss = tf.constant(0.0)

        loss = self.build_loss()
        if optimizer == None:
            # Use ADAM with a decaying scale factor
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                global_step,
                                                100, 0.9, staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            self.train = optimizer.minimize(loss, global_step=global_step)
        else:
            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            self.train = pt.apply_optimizer(optimizer, losses=[loss])

        init = tf.initialize_all_variables()
        if sess == None:
            sess = tf.Session()

        sess.run(init)
        return sess
Exemple #27
0
    def initialize(self, n_data=None):
        # TODO refactor to use VariationalInference's initialize()
        self.n_data = n_data

        # TODO don't fix number of covariates
        self.x = tf.placeholder(tf.float32, [self.n_data, 28 * 28])
        self.losses = tf.constant(0.0)

        loss = self.build_loss()
        optimizer = tf.train.AdamOptimizer(1e-2, epsilon=1.0)
        # TODO move this to not rely on Pretty Tensor
        self.train = pt.apply_optimizer(optimizer, losses=[loss])

        init = tf.initialize_all_variables()
        sess = tf.Session()
        sess.run(init)
        return sess
    def restore_helper(self, runner):
        with tf.Session():
            classification_acuracy = self.softmax_result.softmax.evaluate_classifier(self.target, phase=pt.Phase.test)

            optimizer = tf.train.GradientDescentOptimizer(0.5)
            train_op = pt.apply_optimizer(optimizer, losses=[self.softmax_result.loss])

            runner.train_model(
                train_op, self.softmax_result.loss, 10, (self.input, self.target), self.xor_data, print_every=2
            )
            self.assertTrue(runner._last_init)
            self.assertFalse(runner._last_restore)
        with tf.Session():
            save_paths = list(runner._saver.last_checkpoints)
            runner.evaluate_model(classification_acuracy, 1, (self.input, self.target), self.xor_data)
            self.assertEquals(runner._saver.last_checkpoints, save_paths, "No additional paths should have been saved.")
            self.assertFalse(runner._last_init)
  def test_checkpoint(self):
    f = os.path.join(self.tmp_file, 'checkpoint')
    runner = local_trainer.Runner(save_path=f)
    with tf.Session():
      optimizer = tf.train.GradientDescentOptimizer(0.1)
      train_op = pt.apply_optimizer(optimizer,
                                    losses=[self.softmax_result.loss])

      runner.train_model(train_op,
                         self.softmax_result.loss,
                         10,
                         (self.input, self.target),
                         self.xor_data,
                         print_every=2)
    assert runner._saver.last_checkpoints, 'Expected checkpoints.'
    for x in runner._saver.last_checkpoints:
      self.assertTrue(os.path.isfile(x), 'Promised file not saved: %s' % x)
      self.assertTrue(x.startswith(f), 'Name not as expected: %s' % x)
  def test_eval(self):
    f = os.path.join(self.tmp_file, 'checkpoint')
    runner = local_trainer.Runner(save_path=f)
    with tf.Session():
      classification_acuracy = self.softmax_result.softmax.evaluate_classifier(
          self.target, phase=pt.Phase.test)

      optimizer = tf.train.GradientDescentOptimizer(0.2)
      train_op = pt.apply_optimizer(optimizer,
                                    losses=[self.softmax_result.loss])

      runner.train_model(train_op,
                         self.softmax_result.loss,
                         100,
                         (self.input, self.target),
                         self.xor_data,
                         print_every=50)
      self.assertTrue(runner._last_init)
      save_paths = list(runner._saver.last_checkpoints)

      # The accuracy should be 50% right now since model is consistently
      # generated.
      accuracy = runner.evaluate_model(classification_acuracy,
                                       1,
                                       (self.input, self.target),
                                       self.xor_data)
      self.assertEquals(runner._saver.last_checkpoints, save_paths,
                        'No additional paths should have been saved.')
      self.assertFalse(runner._last_init)
      self.assertEqual(accuracy, 0.5)

      # Train the model to 100% accuracy.
      runner.train_model(train_op,
                         self.softmax_result.loss,
                         2000,
                         (self.input, self.target),
                         self.xor_data,
                         print_every=1000)
      accuracy = runner.evaluate_model(classification_acuracy, 1,
                                       (self.input, self.target), self.xor_data)
      self.assertFalse(runner._last_init)

      # Make sure that the previous computation didn't impact this eval.
      self.assertEqual(accuracy, 1.0)
Exemple #31
0
def main(_=None):
    image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28 * 28])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

    if FLAGS.model == 'full':
        result = multilayer_fully_connected(image_placeholder,
                                            labels_placeholder)
    elif FLAGS.model == 'conv':
        result = lenet5(image_placeholder, labels_placeholder)
    else:
        raise ValueError('model must be full or conv: %s' % FLAGS.model)

    accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                                  phase=pt.Phase.test)
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # 数据
    mnist = data_mnist.read_data_sets(one_hot=True)

    runner = pt.train.Runner(save_path=FLAGS.save_path)

    with tf.Session():
        for epoch in xrange(10):
            # 训练
            runner.train_model(
                train_op,
                result.loss,
                EPOCH_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, mnist.train.images,
                                              mnist.train.labels),
                print_every=100)
            # 正确率
            classification_accuracy = runner.evaluate_model(
                accuracy,
                TEST_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, mnist.test.images,
                                              mnist.test.labels))
            print('Accuracy after %d epoch %g%%' %
                  (epoch + 1, classification_accuracy * 100))
Exemple #32
0
    def restore_helper(self, runner):
        with tf.Session():
            classification_acuracy = self.softmax_result.softmax.evaluate_classifier(
                self.target, phase=pt.Phase.test)

            optimizer = tf.train.GradientDescentOptimizer(0.5)
            train_op = pt.apply_optimizer(optimizer,
                                          losses=[self.softmax_result.loss])

            runner.train_model(train_op,
                               self.softmax_result.loss,
                               10, (self.input, self.target),
                               self.xor_data,
                               print_every=2)
            self.assertTrue(runner._last_init)
            self.assertFalse(runner._last_restore)
        with tf.Session():
            save_paths = list(runner._saver.last_checkpoints)
            runner.evaluate_model(classification_acuracy, 1,
                                  (self.input, self.target), self.xor_data)
            self.assertEquals(runner._saver.last_checkpoints, save_paths,
                              'No additional paths should have been saved.')
            self.assertFalse(runner._last_init)
Exemple #33
0
def main(_=None):
  image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28*28])
  labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

  if FLAGS.model == 'full':
    result = multilayer_fully_connected(image_placeholder, labels_placeholder)
  elif FLAGS.model == 'conv':
    result = lenet5(image_placeholder, labels_placeholder)
  else:
    raise ValueError('model must be full or conv: %s' % FLAGS.model)

  accuracy = result.softmax.evaluate_classifier(labels_placeholder, phase=pt.Phase.test)
  optimizer = tf.train.GradientDescentOptimizer(0.01)
  train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

  # 数据
  mnist = data_mnist.read_data_sets(one_hot=True)

  runner = pt.train.Runner(save_path=FLAGS.save_path)

  with tf.Session():
    for epoch in xrange(10):
      # 训练
      runner.train_model(
          train_op,
          result.loss,
          EPOCH_SIZE,
          feed_vars=(image_placeholder, labels_placeholder),
          feed_data= pt.train.feed_numpy(BATCH_SIZE, mnist.train.images, mnist.train.labels),
          print_every=100)
      # 正确率
      classification_accuracy = runner.evaluate_model(
          accuracy,
          TEST_SIZE,
          feed_vars=(image_placeholder, labels_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, mnist.test.images, mnist.test.labels))
      print('Accuracy after %d epoch %g%%' % (epoch + 1, classification_accuracy * 100))
Exemple #34
0
def main(_=None):
    print 'Starting Baby Names'

    # Since we are feeding our data as numpy arrays, we need to create
    # placeholders in the graph.
    # These must then be fed using the feed dict.
    input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
    output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES])

    inp = data_utils.reshape_data(input_placeholder)

    # Create a label for each timestep.
    labels = data_utils.reshape_data(tf.reshape(
        tf.tile(output_placeholder, [1, TIMESTEPS]),
        [BATCH_SIZE, TIMESTEPS, SEXES]),
                                     per_example_length=2)

    # We also need to set per example weights so that the softmax doesn't output a
    # prediction on intermediate nodes.
    length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1])

    # We need a dense multiplier for the per example weights.  The only place
    # that has a non-zero loss is the first EOS after the last character of the
    # name; the characters in the name and the trailing EOS characters are given a
    # 0 loss by assigning the weight to 0.0 and in the end only one character in
    # each batch has a weight of 1.0.
    # sparse_to_dense does a lookup using the indices from the first Tensor.
    # Because we are filling in a 2D array, the indices need to be 2 dimensional.
    # Since we want to assign 1 value for each row, the first dimension can just
    # be a sequence.
    t = tf.concat(1, [
        tf.constant(numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)),
                    dtype=tf.int32), length_placeholder
    ])

    # Squeeze removes dimensions that are equal to 1.  per_example_weights must
    # end up as 1 dimensional.
    per_example_weights = data_utils.reshape_data(
        tf.sparse_to_dense(t, [BATCH_SIZE, TIMESTEPS], 1.0,
                           default_value=0.0)).squeeze()

    # We need 2 copies of the graph that share variables.  The first copy runs
    # training and will do dropout if specified and the second will not include
    # dropout.  Dropout is controlled by the phase argument, which sets the mode
    # consistently throughout a graph.
    with tf.variable_scope('baby_names'):
        result = create_model(inp, labels, TIMESTEPS, per_example_weights)

    # Call variable scope by name so we also create a name scope.  This ensures
    # that we share variables and our names are properly organized.
    with tf.variable_scope('baby_names', reuse=True):
        # Some ops have different behaviors in test vs train and these take a phase
        # argument.
        test_result = create_model(inp,
                                   labels,
                                   TIMESTEPS,
                                   per_example_weights,
                                   phase=pt.Phase.test)

    # For tracking accuracy in evaluation, we need to add an evaluation node.
    # We only run this when testing, so we need to specify that in the phase.
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    accuracy = test_result.softmax.evaluate_classifier(
        labels, phase=pt.Phase.test, per_example_weights=per_example_weights)

    # We can also compute a batch accuracy to monitor progress.
    batch_accuracy = result.softmax.evaluate_classifier(
        labels, phase=pt.Phase.train, per_example_weights=per_example_weights)

    # Grab the inputs, outputs and lengths as numpy arrays.
    # Lengths could have been calculated from names, but it was easier to
    # calculate inside the utility function.
    names, sex, lengths = data_utils.baby_names(TIMESTEPS)

    epoch_size = len(names) / BATCH_SIZE
    # Create the gradient optimizer and apply it to the graph.
    # pt.apply_optimizer adds regularization losses and sets up a step counter
    # (pt.global_step()) for you.
    # This sequence model does very well with initially high rates.
    optimizer = tf.train.AdagradOptimizer(
        tf.train.exponential_decay(1.0,
                                   pt.global_step(),
                                   epoch_size,
                                   0.95,
                                   staircase=True))
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # We can set a save_path in the runner to automatically checkpoint every so
    # often.  Otherwise at the end of the session, the model will be lost.
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(100):
            # Shuffle the training data.
            names, sex, lengths = data_utils.permute_data(
                (names, sex, lengths))

            runner.train_model(
                train_op, [result.loss, batch_accuracy],
                epoch_size,
                feed_vars=(input_placeholder, output_placeholder,
                           length_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                epoch_size,
                print_every=0,
                feed_vars=(input_placeholder, output_placeholder,
                           length_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths))

            print 'Accuracy after epoch %d: %g%%' % (
                epoch + 1, classification_accuracy * 100)
Exemple #35
0
def train_mnist_discrim(prior, lossmetric="KL"):
    '''Train model to output transformation that prevents leaking private info
       using a discriminator to aid producing natural images
    '''
    data_dir = os.path.join(FLAGS.working_directory, "data")
    mnist_dir = os.path.join(data_dir, "mnist")
    model_directory = os.path.join(
        mnist_dir,
        lossmetric + "discrim_privacy_checkpoints" + str(encode_coef))
    input_tensor = tf.placeholder(tf.float32,
                                  [FLAGS.batch_size, FLAGS.input_size])
    output_tensor = tf.placeholder(tf.float32,
                                   [FLAGS.batch_size, FLAGS.output_size])
    private_tensor = tf.placeholder(tf.float32,
                                    [FLAGS.batch_size, FLAGS.private_size])
    rawc_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size])
    prior_tensor = tf.constant(prior, tf.float32, [FLAGS.private_size])

    #load data not necessary for mnist data
    mnist = input_data.read_data_sets(mnist_dir, one_hot=True)

    def get_feed(batch_no, training):
        if training:
            x, c = mnist.train.next_batch(FLAGS.batch_size)
        else:
            x, c = mnist.test.next_batch(FLAGS.batch_size)
        rawc = np.argmax(c, axis=1)
        return {
            input_tensor: x,
            output_tensor: x,
            private_tensor: c[:, :FLAGS.private_size],
            rawc_tensor: rawc
        }

    #instantiate model
    with pt.defaults_scope(activation_fn=tf.nn.relu,
                           batch_normalize=True,
                           learned_moments_update_rate=3e-4,
                           variance_epsilon=1e-3,
                           scale_after_normalization=True):
        with pt.defaults_scope(phase=pt.Phase.train):
            with tf.variable_scope("encoder") as scope:
                z = dvibcomp.privacy_encoder(input_tensor, private_tensor)
                encode_params = tf.trainable_variables()
                e_param_len = len(encode_params)
            with tf.variable_scope("decoder") as scope:
                xhat, chat, mean, stddev = dvibcomp.mnist_predictor(z)
                all_params = tf.trainable_variables()
                d_param_len = len(all_params) - e_param_len
            with tf.variable_scope("discrim") as scope:
                D1 = dvibcomp.mnist_discriminator(
                    input_tensor)  # positive samples
            with tf.variable_scope("discrim", reuse=True) as scope:
                D2 = dvibcomp.mnist_discriminator(xhat)  # negative samples
                all_params = tf.trainable_variables()
                discrim_len = len(all_params) - (d_param_len + e_param_len)

    # Calculating losses
    _, KLloss = dvibloss.encoding_cost(xhat, chat, input_tensor,
                                       private_tensor, prior_tensor)
    loss2x, loss2c = dvibloss.recon_cost(xhat,
                                         chat,
                                         input_tensor,
                                         private_tensor,
                                         softmax=True)
    loss_g = dvibloss.get_gen_cost(D2)
    loss_d = dvibloss.get_discrim_cost(D1, D2)
    loss_vae = dvibloss.get_vae_cost(mean, stddev)
    # Record losses of MI approximation and sibson MI
    h_c, h_cz, _, _ = dvibloss.MI_approx(input_tensor, private_tensor,
                                         rawc_tensor, xhat, chat, z)
    I_c_cz = tf.abs(h_c - h_cz)
    # use alpha = 3 first, may be tuned
    sibMI_c_cz = dvibloss.sibsonMI_approx(z, chat, 3)
    # Compose losses
    if lossmetric == "KL":
        loss1 = encode_coef * loss_g + KLloss
    if lossmetric == "MI":
        loss1 = encode_coef * loss_g + I_c_cz
    if lossmetric == "sibMI":
        loss1 = encode_coef * loss_g + sibMI_c_cz
    loss2 = decode_coef * loss_g + loss2c
    loss3 = loss_d

    with tf.name_scope('pub_prediction'):
        with tf.name_scope('pub_distance'):
            pub_dist = tf.reduce_mean((xhat - output_tensor)**2)
    with tf.name_scope('sec_prediction'):
        with tf.name_scope('sec_distance'):
            sec_dist = tf.reduce_mean((chat - private_tensor)**2)
            #correct_pred = tf.less(tf.abs(chat - private_tensor), 0.5)
            correct_pred = tf.equal(tf.argmax(chat, axis=1),
                                    tf.argmax(private_tensor, axis=1))
            sec_acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0)
    e_train = pt.apply_optimizer(
        optimizer,
        losses=[loss1],
        regularize=True,
        include_marked=True,
        var_list=encode_params)  # privatizer/encoder training op
    g_train = pt.apply_optimizer(
        optimizer,
        losses=[loss2],
        regularize=True,
        include_marked=True,
        var_list=all_params[e_param_len:])  # generator/decoder training op
    d_train = pt.apply_optimizer(
        optimizer,
        losses=[loss3],
        regularize=True,
        include_marked=True,
        var_list=all_params[e_param_len +
                            d_param_len:])  # discriminator training op
    # Logging matrices
    e_loss_train = np.zeros(FLAGS.max_epoch)
    g_loss_train = np.zeros(FLAGS.max_epoch)
    d_loss_train = np.zeros(FLAGS.max_epoch)
    pub_dist_train = np.zeros(FLAGS.max_epoch)
    sec_dist_train = np.zeros(FLAGS.max_epoch)
    loss2x_train = np.zeros(FLAGS.max_epoch)
    loss2c_train = np.zeros(FLAGS.max_epoch)
    KLloss_train = np.zeros(FLAGS.max_epoch)
    MIloss_train = np.zeros(FLAGS.max_epoch)
    sibMIloss_train = np.zeros(FLAGS.max_epoch)
    sec_acc_train = np.zeros(FLAGS.max_epoch)
    e_loss_val = np.zeros(FLAGS.max_epoch)
    g_loss_val = np.zeros(FLAGS.max_epoch)
    d_loss_val = np.zeros(FLAGS.max_epoch)
    pub_dist_val = np.zeros(FLAGS.max_epoch)
    sec_dist_val = np.zeros(FLAGS.max_epoch)
    loss2x_val = np.zeros(FLAGS.max_epoch)
    loss2c_val = np.zeros(FLAGS.max_epoch)
    KLloss_val = np.zeros(FLAGS.max_epoch)
    MIloss_val = np.zeros(FLAGS.max_epoch)
    sibMIloss_val = np.zeros(FLAGS.max_epoch)
    sec_acc_val = np.zeros(FLAGS.max_epoch)
    xhat_val = []
    # Tensorboard logging
    #tf.summary.scalar('KL', KLloss)
    #tf.summary.scalar('loss_x', loss2x)
    #tf.summary.scalar('loss_c', loss2c)
    #tf.summary.scalar('pub_dist', pub_dist)
    #tf.summary.scalar('sec_dist', sec_dist)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    # Config session for memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = 0.8
    config.log_device_placement = False

    sess = tf.Session(config=config)
    sess.run(init)
    #merged = tf.summary.merge_all()
    #train_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train', sess.graph)
    #test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test')

    for epoch in range(FLAGS.max_epoch):
        widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=FLAGS.updates_per_epoch, widgets=widgets)
        pbar.start()

        pub_loss = 0
        sec_loss = 0
        sec_accv = 0
        e_training_loss = 0
        g_training_loss = 0
        d_training_loss = 0
        KLv = 0
        MIv = 0
        sibMIv = 0
        loss2xv = 0
        loss2cv = 0
        #pdb.set_trace()

        for i in range(FLAGS.updates_per_epoch):
            pbar.update(i)
            feeds = get_feed(i, True)
            #zv, xhatv, chatv, meanv, stddevv, sec_pred = sess.run([z, xhat, chat, mean, stddev, correct_pred], feeds)
            pub_tmp, sec_tmp, sec_acc_tmp, KLtmp, MItmp, sibMItmp, loss2xtmp, loss2ctmp, loss3tmp = sess.run(
                [
                    pub_dist, sec_dist, sec_acc, KLloss, I_c_cz, sibMI_c_cz,
                    loss2x, loss2c, loss_vae
                ], feeds)
            #_, e_loss_value, _, g_loss_value, _, d_loss_value = sess.run([e_train, loss1, g_train, loss2, d_train, loss3], feeds)
            _, e_loss_value = sess.run([e_train, loss1], feeds)
            _, g_loss_value = sess.run([g_train, loss2], feeds)
            _, d_loss_value = sess.run([d_train, loss3], feeds)
            if (np.isnan(e_loss_value) or np.isnan(g_loss_value)
                    or np.isnan(d_loss_value)):
                pdb.set_trace()
                break
            #train_writer.add_summary(summary, i)
            e_training_loss += e_loss_value
            g_training_loss += g_loss_value
            d_training_loss += d_loss_value
            pub_loss += pub_tmp
            sec_loss += sec_tmp
            sec_accv += sec_acc_tmp
            KLv += KLtmp
            MIv += MItmp
            sibMIv += sibMItmp
            loss2xv += loss2xtmp
            loss2cv += loss2ctmp

        e_training_loss = e_training_loss / \
            (FLAGS.updates_per_epoch)
        g_training_loss = g_training_loss / \
            (FLAGS.updates_per_epoch)
        d_training_loss = d_training_loss / \
            (FLAGS.updates_per_epoch)
        pub_loss /= (FLAGS.updates_per_epoch)
        sec_loss /= (FLAGS.updates_per_epoch)
        sec_accv /= (FLAGS.updates_per_epoch)
        loss2xv /= (FLAGS.updates_per_epoch)
        loss2cv /= (FLAGS.updates_per_epoch)
        KLv /= (FLAGS.updates_per_epoch)
        MIv /= (FLAGS.updates_per_epoch)
        sibMIv /= (FLAGS.updates_per_epoch)

        print("Loss for E %f, for G %f, for D %f" %
              (e_training_loss, g_training_loss, d_training_loss))
        print('Training public loss at epoch %s: %s' % (epoch, pub_loss))
        print('Training private loss at epoch %s: %s, private accuracy: %s' %
              (epoch, sec_loss, sec_accv))
        e_loss_train[epoch] = e_training_loss
        g_loss_train[epoch] = g_training_loss
        d_loss_train[epoch] = d_training_loss
        pub_dist_train[epoch] = pub_loss
        sec_dist_train[epoch] = sec_loss
        loss2x_train[epoch] = loss2xv
        loss2c_train[epoch] = loss2cv
        KLloss_train[epoch] = KLv
        MIloss_train[epoch] = MIv
        sibMIloss_train[epoch] = sibMIv
        sec_acc_train[epoch] = sec_accv
        # Forced Garbage Collection
        gc.collect()
        # Validation
        if epoch % 10 == 9:
            pub_loss = 0
            sec_loss = 0
            e_val_loss = 0
            g_val_loss = 0
            d_val_loss = 0
            loss2xv = 0
            loss2cv = 0
            KLv = 0
            MIv = 0
            sec_accv = 0

            for i in range(int(FLAGS.test_dataset_size / FLAGS.batch_size)):
                feeds = get_feed(i, False)
                e_val_tmp, g_val_tmp, d_val_tmp, pub_loss, sec_loss, MItmp, sibMItmp, KLtmp, loss2xtmp, loss2ctmp, sec_acc_tmp = sess.run(
                    [
                        loss1, loss2, loss3, pub_dist, sec_dist, I_c_cz,
                        sibMI_c_cz, KLloss, loss2x, loss2c, sec_acc
                    ], feeds)
                if (epoch >= FLAGS.max_epoch - 10):
                    xhat_val.extend(sess.run(xhat, feeds))
                #test_writer.add_summary(summary, i)
                e_val_loss += e_val_tmp
                g_val_loss += g_val_tmp
                d_val_loss += d_val_tmp
                sec_accv += sec_acc_tmp
                KLv += KLtmp
                MIv += MItmp
                sibMIv += sibMItmp
                loss2xv += loss2xtmp
                loss2cv += loss2ctmp

            pub_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            sec_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            e_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            g_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            d_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            loss2xv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            loss2cv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            KLv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            MIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            sibMIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            sec_accv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)

            print('Test public loss at epoch %s: %s' % (epoch, pub_loss))
            print('Test private loss at epoch %s: %s' % (epoch, sec_loss))
            e_loss_val[epoch] = e_val_loss
            g_loss_val[epoch] = g_val_loss
            d_loss_val[epoch] = d_val_loss
            pub_dist_val[epoch] = pub_loss
            sec_dist_val[epoch] = sec_loss
            loss2x_val[epoch] = loss2xv
            loss2c_val[epoch] = loss2cv
            KLloss_val[epoch] = KLv
            MIloss_val[epoch] = MIv
            sibMIloss_val[epoch] = sibMIv
            sec_acc_val[epoch] = sec_accv

            if not (np.isnan(e_val_loss) or np.isnan(g_val_loss)
                    or np.isnan(d_val_loss)):
                savepath = saver.save(sess,
                                      model_directory + '/mnist_privacy',
                                      global_step=epoch)
                print('Model saved at epoch %s, path is %s' %
                      (epoch, savepath))
                gc.collect()

    np.savez(os.path.join(model_directory, 'synth_trainstats'),
             e_loss_train=e_loss_train,
             g_loss_train=g_loss_train,
             d_loss_train=d_loss_train,
             pub_dist_train=pub_dist_train,
             sec_dist_train=sec_dist_train,
             loss2x_train=loss2x_train,
             loss2c_train=loss2c_train,
             KLloss_train=KLloss_train,
             MIloss_train=MIloss_train,
             sibMIloss_train=sibMIloss_train,
             sec_acc_train=sec_acc_train,
             e_loss_val=e_loss_val,
             g_loss_val=g_loss_val,
             d_loss_val=d_loss_val,
             pub_dist_val=pub_dist_val,
             sec_dist_val=sec_dist_val,
             loss2x_val=loss2x_val,
             loss2c_val=loss2c_val,
             KLloss_val=KLloss_val,
             MIloss_val=MIloss_val,
             sibMIloss_val=sibMIloss_val,
             sec_acc_val=sec_acc_val,
             xhat_val=xhat_val)

    sess.close()
Exemple #36
0
def train_ferg(prior, lossmetric="KL", order=1.01):
    '''Train model to output transformation that prevents leaking private info
    '''
    data_dir = os.path.join(FLAGS.working_directory, "data")
    dataset_dir = os.path.join(data_dir, "ferg")
    model_directory = os.path.join(
        dataset_dir, lossmetric + "privacy_checkpoints" + str(encode_coef) +
        '_' + str(decode_coef) + '_' + str(order))
    input_tensor = tf.placeholder(tf.float32,
                                  [FLAGS.batch_size, FLAGS.input_size])
    output_tensor = tf.placeholder(tf.float32,
                                   [FLAGS.batch_size, FLAGS.output_size])
    private_tensor = tf.placeholder(tf.float32,
                                    [FLAGS.batch_size, FLAGS.private_size])
    prior_tensor = tf.constant(prior, tf.float32, [FLAGS.private_size])
    rawc_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size])
    rawy_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size])

    #load data not necessary for mnist data, formatted as vectors of real values between 0 and 1
    #load FERG dataset and shuffle, save, reload
    fergdata = np.load(os.path.join(dataset_dir, "ferg256.npz"))

    #fergdataindices = np.random.permutation(FLAGS.dataset_size+FLAGS.test_dataset_size)
    #fergdataimgs = fergdata['imgs'][fergdataindices]
    #fergdataidentity = fergdata['identity'][fergdataindices]
    #fergdataexpression = fergdata['expression'][fergdataindices]
    #np.savez(os.path.join(dataset_dir, "ferg256.npz"),
    #        imgs = fergdataimgs,
    #        identity = fergdataidentity,
    #        expression = fergdataexpression)
    #fergdata = np.load(os.path.join(dataset_dir, "ferg256.npz"))

    def get_feed(batch_no, training, ferg):
        if training:
            x = ferg['imgs'][batch_no * FLAGS.batch_size:(batch_no + 1) *
                             FLAGS.batch_size]
            c = ferg['identity'][batch_no * FLAGS.batch_size:(batch_no + 1) *
                                 FLAGS.batch_size]
            y = ferg['expression'][batch_no * FLAGS.batch_size:(batch_no + 1) *
                                   FLAGS.batch_size]
        else:
            x = ferg['imgs'][batch_no * FLAGS.batch_size +
                             FLAGS.dataset_size:(batch_no + 1) *
                             FLAGS.batch_size + FLAGS.dataset_size]
            c = ferg['identity'][batch_no * FLAGS.batch_size +
                                 FLAGS.dataset_size:(batch_no + 1) *
                                 FLAGS.batch_size + FLAGS.dataset_size]
            y = ferg['expression'][batch_no * FLAGS.batch_size +
                                   FLAGS.dataset_size:(batch_no + 1) *
                                   FLAGS.batch_size + FLAGS.dataset_size]
        x = x.reshape([FLAGS.batch_size, FLAGS.input_size])
        # convert labels to one hot encoding
        cs = np.zeros((FLAGS.batch_size, FLAGS.private_size))
        cs[np.arange(FLAGS.batch_size), c] = 1
        ys = np.zeros((FLAGS.batch_size, FLAGS.output_size))
        ys[np.arange(FLAGS.batch_size), y] = 1
        return {
            input_tensor: x,
            output_tensor: ys,
            private_tensor: cs,
            rawc_tensor: c,
            rawy_tensor: y
        }

    #instantiate model
    with pt.defaults_scope(activation_fn=tf.nn.relu,
                           batch_normalize=True,
                           learned_moments_update_rate=3e-4,
                           variance_epsilon=1e-3,
                           scale_after_normalization=True):
        with pt.defaults_scope(phase=pt.Phase.train):
            with tf.variable_scope("encoder") as scope:
                z = dvibcomp.ferg_encoder(input_tensor)
                encode_params = tf.trainable_variables()
                e_param_len = len(encode_params)
            with tf.variable_scope("decoder") as scope:
                yhat, chat, mean, stddev = dvibcomp.ferg_twotask_predictor(z)
                all_params = tf.trainable_variables()
                d_param_len = len(all_params) - e_param_len

    # Calculating losses
    _, KLloss = dvibloss.encoding_cost(yhat,
                                       chat,
                                       output_tensor,
                                       private_tensor,
                                       prior_tensor,
                                       xmetric="CE",
                                       independent=False)
    loss2x, loss2c = dvibloss.recon_cost(yhat,
                                         chat,
                                         output_tensor,
                                         private_tensor,
                                         softmax=True,
                                         xmetric="CE")
    # Record losses of MI approximation and sibson MI
    h_c, h_cz, _ = dvibloss.MI_approx(input_tensor, private_tensor,
                                      rawc_tensor, yhat, chat, z)
    I_c_cz = tf.abs(h_c - h_cz)
    # use alpha = 3 first, may be tuned
    sibMI_c_cz = dvibloss.sibsonMI_approx(z, chat, order, independent=False)
    # Compose losses
    if lossmetric == "KL":
        loss1 = encode_coef * loss2x + KLloss
    if lossmetric == "MI":
        loss1 = encode_coef * loss2x + I_c_cz
    if lossmetric == "sibMI":
        loss1 = encode_coef * loss2x + sibMI_c_cz
    loss2 = decode_coef * loss2x + loss2c
    loss3 = dvibloss.get_vae_cost(mean, stddev)

    with tf.name_scope('pub_prediction'):
        with tf.name_scope('pub_distance'):
            pub_dist = tf.reduce_mean((yhat - output_tensor)**2)
            correct_predpub = tf.equal(tf.argmax(yhat, axis=1),
                                       tf.argmax(output_tensor, axis=1))
            pub_acc = tf.reduce_mean(tf.cast(correct_predpub, tf.float32))
    with tf.name_scope('sec_prediction'):
        with tf.name_scope('sec_distance'):
            sec_dist = tf.reduce_mean((chat - private_tensor)**2)
            #correct_pred = tf.less(tf.abs(chat - private_tensor), 0.5)
            correct_pred = tf.equal(tf.argmax(chat, axis=1),
                                    tf.argmax(private_tensor, axis=1))
            sec_acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0)
    e_train = pt.apply_optimizer(optimizer,
                                 losses=[loss1],
                                 regularize=True,
                                 include_marked=True,
                                 var_list=encode_params)
    d_train = pt.apply_optimizer(optimizer,
                                 losses=[loss2],
                                 regularize=True,
                                 include_marked=True,
                                 var_list=all_params[e_param_len:])
    # Logging matrices
    e_loss_train = np.zeros(FLAGS.max_epoch)
    d_loss_train = np.zeros(FLAGS.max_epoch)
    pub_dist_train = np.zeros(FLAGS.max_epoch)
    sec_dist_train = np.zeros(FLAGS.max_epoch)
    loss2x_train = np.zeros(FLAGS.max_epoch)
    loss2c_train = np.zeros(FLAGS.max_epoch)
    KLloss_train = np.zeros(FLAGS.max_epoch)
    MIloss_train = np.zeros(FLAGS.max_epoch)
    sibMIloss_train = np.zeros(FLAGS.max_epoch)
    pub_acc_train = np.zeros(FLAGS.max_epoch)
    sec_acc_train = np.zeros(FLAGS.max_epoch)
    e_loss_val = np.zeros(FLAGS.max_epoch)
    d_loss_val = np.zeros(FLAGS.max_epoch)
    pub_dist_val = np.zeros(FLAGS.max_epoch)
    sec_dist_val = np.zeros(FLAGS.max_epoch)
    loss2x_val = np.zeros(FLAGS.max_epoch)
    loss2c_val = np.zeros(FLAGS.max_epoch)
    KLloss_val = np.zeros(FLAGS.max_epoch)
    MIloss_val = np.zeros(FLAGS.max_epoch)
    sibMIloss_val = np.zeros(FLAGS.max_epoch)
    pub_acc_val = np.zeros(FLAGS.max_epoch)
    sec_acc_val = np.zeros(FLAGS.max_epoch)
    yhat_val = []
    # Tensorboard logging
    #tf.summary.scalar('e_loss', loss1)
    #tf.summary.scalar('KL', KLloss)
    #tf.summary.scalar('loss_x', loss2x)
    #tf.summary.scalar('loss_c', loss2c)
    #tf.summary.scalar('pub_dist', pub_dist)
    #tf.summary.scalar('sec_dist', sec_dist)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    # Config session for memory
    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = 0.8
    config.log_device_placement = False

    sess = tf.Session(config=config)
    sess.run(init)
    #merged = tf.summary.merge_all()
    #train_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train', sess.graph)
    #test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test')
    pdb.set_trace()

    for epoch in range(FLAGS.max_epoch):
        widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=FLAGS.updates_per_epoch, widgets=widgets)
        pbar.start()

        pub_loss = 0
        sec_loss = 0
        pub_accv = 0
        sec_accv = 0
        e_training_loss = 0
        d_training_loss = 0
        KLv = 0
        MIv = 0
        sibMIv = 0
        loss2xv = 0
        loss2cv = 0

        for i in range(FLAGS.updates_per_epoch):
            pbar.update(i)
            feeds = get_feed(i, True, fergdata)
            zv, yhatv, chatv, meanv, stddevv, sec_pred = sess.run(
                [z, yhat, chat, mean, stddev, correct_pred], feeds)
            pub_tmp, sec_tmp, pub_acc_tmp, sec_acc_tmp = sess.run(
                [pub_dist, sec_dist, pub_acc, sec_acc], feeds)
            MItmp, sibMItmp, KLtmp, loss2xtmp, loss2ctmp, loss3tmp = sess.run(
                [I_c_cz, sibMI_c_cz, KLloss, loss2x, loss2c, loss3], feeds)
            _, e_loss_value = sess.run([e_train, loss1], feeds)
            _, d_loss_value = sess.run([d_train, loss2], feeds)
            if (np.isnan(e_loss_value) or np.isnan(d_loss_value)):
                pdb.set_trace()
                break
            #train_writer.add_summary(summary, i)
            e_training_loss += e_loss_value
            d_training_loss += d_loss_value
            pub_loss += pub_tmp
            sec_loss += sec_tmp
            pub_accv += pub_acc_tmp
            sec_accv += sec_acc_tmp
            KLv += KLtmp
            MIv += MItmp
            sibMIv += sibMItmp
            loss2xv += loss2xtmp
            loss2cv += loss2ctmp

        e_training_loss = e_training_loss / \
            (FLAGS.updates_per_epoch)
        d_training_loss = d_training_loss / \
            (FLAGS.updates_per_epoch)
        pub_loss /= (FLAGS.updates_per_epoch)
        sec_loss /= (FLAGS.updates_per_epoch)
        pub_accv /= (FLAGS.updates_per_epoch)
        sec_accv /= (FLAGS.updates_per_epoch)
        loss2xv /= (FLAGS.updates_per_epoch)
        loss2cv /= (FLAGS.updates_per_epoch)
        KLv /= (FLAGS.updates_per_epoch)
        MIv /= (FLAGS.updates_per_epoch)
        sibMIv /= (FLAGS.updates_per_epoch)

        print("Loss for E %f, and for D %f" %
              (e_training_loss, d_training_loss))
        print('Training public loss at epoch %s: %s, public accuracy: %s' %
              (epoch, pub_loss, pub_accv))
        print('Training private loss at epoch %s: %s, private accuracy: %s' %
              (epoch, sec_loss, sec_accv))
        print('Training KL loss at epoch %s: %s' % (epoch, KLv))
        e_loss_train[epoch] = e_training_loss
        d_loss_train[epoch] = d_training_loss
        pub_dist_train[epoch] = pub_loss
        sec_dist_train[epoch] = sec_loss
        loss2x_train[epoch] = loss2xv
        loss2c_train[epoch] = loss2cv
        KLloss_train[epoch] = KLv
        MIloss_train[epoch] = MIv
        sibMIloss_train[epoch] = sibMIv
        pub_acc_train[epoch] = pub_accv
        sec_acc_train[epoch] = sec_accv
        # Validation
        if epoch % 10 == 9:
            pub_loss = 0
            sec_loss = 0
            e_val_loss = 0
            d_val_loss = 0
            loss2xv = 0
            loss2cv = 0
            KLv = 0
            MIv = 0
            sibMIv = 0
            pub_accv = 0
            sec_accv = 0
            for i in range(int(FLAGS.test_dataset_size / FLAGS.batch_size)):
                feeds = get_feed(i, False, fergdata)
                pub_loss += sess.run(pub_dist, feeds)
                sec_loss += sess.run(sec_dist, feeds)
                e_val_loss += sess.run(loss1, feeds)
                d_val_loss += sess.run(loss2, feeds)
                zv, yhatv, chatv, meanv, stddevv, sec_pred = sess.run(
                    [z, yhat, chat, mean, stddev, correct_pred], feeds)
                MItmp, sibMItmp, KLtmp, loss2xtmp, loss2ctmp, pub_acc_tmp, sec_acc_tmp = sess.run(
                    [
                        I_c_cz, sibMI_c_cz, KLloss, loss2x, loss2c, pub_acc,
                        sec_acc
                    ], feeds)
                if (epoch >= FLAGS.max_epoch - 10):
                    yhat_val.extend(sess.run(yhat, feeds))
                #test_writer.add_summary(summary, i)
                pub_accv += pub_acc_tmp
                sec_accv += sec_acc_tmp
                KLv += KLtmp
                MIv += MItmp
                sibMIv += sibMItmp
                loss2xv += loss2xtmp
                loss2cv += loss2ctmp

            pub_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            sec_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            e_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            d_val_loss /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            loss2xv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            loss2cv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            KLv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            MIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            sibMIv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            pub_accv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)
            sec_accv /= int(FLAGS.test_dataset_size / FLAGS.batch_size)

            print('Test public loss at epoch %s: %s, public accuracy: %s' %
                  (epoch, pub_loss, pub_accv))
            print('Test private loss at epoch %s: %s, private accuracy: %s' %
                  (epoch, sec_loss, sec_accv))
            e_loss_val[epoch] = e_val_loss
            d_loss_val[epoch] = d_val_loss
            pub_dist_val[epoch] = pub_loss
            sec_dist_val[epoch] = sec_loss
            loss2x_val[epoch] = loss2xv
            loss2c_val[epoch] = loss2cv
            KLloss_val[epoch] = KLv
            MIloss_val[epoch] = MIv
            sibMIloss_val[epoch] = sibMIv
            pub_acc_val[epoch] = pub_accv
            sec_acc_val[epoch] = sec_accv

            if not (np.isnan(e_loss_value) or np.isnan(d_loss_value)):
                savepath = saver.save(sess,
                                      model_directory + '/ferg_privacy',
                                      global_step=epoch)
                print('Model saved at epoch %s, path is %s' %
                      (epoch, savepath))

    np.savez(os.path.join(model_directory, 'ferg_trainstats'),
             e_loss_train=e_loss_train,
             d_loss_train=d_loss_train,
             pub_dist_train=pub_dist_train,
             sec_dist_train=sec_dist_train,
             loss2x_train=loss2x_train,
             loss2c_train=loss2c_train,
             KLloss_train=KLloss_train,
             MIloss_train=MIloss_train,
             sibMIloss_train=sibMIloss_train,
             pub_acc_train=pub_acc_train,
             sec_acc_train=sec_acc_train,
             e_loss_val=e_loss_val,
             d_loss_val=d_loss_val,
             pub_dist_val=pub_dist_val,
             sec_dist_val=sec_dist_val,
             loss2x_val=loss2x_val,
             loss2c_val=loss2c_val,
             KLloss_val=KLloss_val,
             MIloss_val=MIloss_val,
             sibMIloss_val=sibMIloss_val,
             pub_acc_val=pub_acc_val,
             sec_acc_val=sec_acc_val,
             yhat_val=yhat_val)

    sess.close()
                G = generator()

            with tf.variable_scope("model", reuse=True):
                D2 = discriminator(G)  # generated examples

    D_loss = get_discrinator_loss(D1, D2)
    G_loss = get_generator_loss(D2)

    learning_rate = tf.placeholder(tf.float32, shape=[])
    optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1.0)
    params = tf.trainable_variables()
    D_params = params[:D_params_num]
    G_params = params[D_params_num:]
#    train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params)
#    train_generator = optimizer.minimize(loss=G_loss, var_list=G_params)
    train_discrimator = pt.apply_optimizer(optimizer, losses=[D_loss], regularize=True, include_marked=True, var_list=D_params)
    train_generator = pt.apply_optimizer(optimizer, losses=[G_loss], regularize=True, include_marked=True, var_list=G_params)

    init = tf.initialize_all_variables()

    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(FLAGS.max_epoch):

            discriminator_loss = 0.0
            generator_loss = 0.0

            widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
            pbar = ProgressBar(max_value = FLAGS.updates_per_epoch, widgets=widgets)
            pbar.start()
            for i in range(FLAGS.updates_per_epoch):
 def prepare_trainer(self, loss):
     vae_opt = tf.train.AdamOptimizer(self.vae_learning_rate)
     self.vae_trainer = \
         pt.apply_optimizer(vae_opt, losses=[loss])
            with tf.variable_scope("model") as scope:
                output_tensor, mean, stddev = decoder(encoder(input_tensor))
                D = discriminator(ground_truth_tensor)
                D_ = discriminator(tf.add(input_tensor, tf.mul(output_tensor, mask_tensor)), reuse=True)

        with pt.defaults_scope(phase=pt.Phase.test):
            with tf.variable_scope("model", reuse=True) as scope:
                sampled_tensor, _, _ = decoder(encoder(input_tensor))
                restored_tensor = tf.add(tf.mul(input_tensor, tf.sub(tf.ones_like(mask_tensor),mask_tensor)),
                                         tf.mul(sampled_tensor, mask_tensor))
    # Restorer reconstruct
    rec_loss = get_reconstruction_cost(output_tensor, ground_truth_tensor,
                                       mask=None, epsilon=1e-12)
    r_loss = rec_loss  # +g_loss
    r_optim = tf.train.AdamOptimizer(FLAGS.r_learning_rate, epsilon=1e-12)
    r_train = pt.apply_optimizer(r_optim, losses=[r_loss])

    # Discriminator
    d_sum = tf.histogram_summary("d", D)
    d__sum = tf.histogram_summary("d_", D_)
    d_loss_real = ops.binary_cross_entropy_with_logits(tf.ones_like(D), D)
    d_loss_fake = ops.binary_cross_entropy_with_logits(tf.zeros_like(D_), D_)
    d_loss_real_sum = tf.scalar_summary("d_loss_real", d_loss_real)
    d_loss_fake_sum = tf.scalar_summary("d_loss_fake", d_loss_fake)
    d_loss = d_loss_real + d_loss_fake
    d_loss_sum = tf.scalar_summary("d_loss", d_loss)
    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if 'd_' in var.name]
    d_optim = tf.train.AdamOptimizer(FLAGS.d_learning_rate, beta1=FLAGS.beta1) \
        .minimize(d_loss, var_list=d_vars)
Exemple #40
0
def main(_=None):
  print('Starting Shakespeare')

  # Since we are feeding our data as numpy arrays, we need to create
  # placeholders in the graph.
  # These must then be fed using the feed dict.
  input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
  output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])

  merged_size = BATCH_SIZE * TIMESTEPS

  inp = data_utils.reshape_data(input_placeholder)

  # We need a dense output to calculate loss and accuracy.
  # sparse_to_dense does a lookup using the indices from the first Tensor.
  # Because we are filling in a 2D array, the indices need to be 2 dimensional.
  t = tf.concat(1,
                [
                    tf.constant(
                        numpy.arange(merged_size).reshape((merged_size, 1)),
                        dtype=tf.int32),
                    data_utils.reshape_data(output_placeholder)
                ])

  labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0)

  # Some ops have different behaviors in test vs train and these take a phase
  # argument.
  with tf.variable_scope('shakespeare'):
    training_logits = create_model(inp, TIMESTEPS, pt.Phase.train)
    # Create the result.  Softmax applies softmax and creates a cross entropy
    # loss.  The result is a namedtuple.
    training_result = training_logits.softmax(labels)

  # Create the gradient optimizer and apply it to the graph.
  # pt.apply_optimizer adds regularization losses and sets up a step counter
  # (pt.global_step()) for you.
  optimizer = tf.train.AdagradOptimizer(0.5)
  train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss])

  # For tracking accuracy in evaluation, we need to add an evaluation node.
  # We only run this when testing, so we need to specify that in the phase.
  # We also want to disable dropout, so we pass the phase to create_model.

  # Call variable scope by name so we also create a name scope.  This ensures
  # that we share variables and our names are properly organized.
  with tf.variable_scope('shakespeare', reuse=True):
    test_logits = create_model(inp, TIMESTEPS, pt.Phase.test)
    test_result = test_logits.softmax(labels)

  # Accuracy creates variables, so make it outside of the above scope.
  accuracy = test_result.softmax.evaluate_classifier(labels,
                                                     phase=pt.Phase.test)

  # Create an inference model so that we can sample.  The big difference is
  # that the input is a single character and it requires reset nodes.
  with tf.variable_scope('shakespeare', reuse=True):
    inference_input = tf.placeholder(tf.int32, [])
    # Needs to be 2 dimensional so that it matches the dims of the other models.
    reshaped = pt.wrap(inference_input).reshape([1, 1])
    inference_logits = create_model(reshaped, 1, pt.Phase.infer)

  # Grab the data as numpy arrays.
  shakespeare = data_utils.shakespeare(TIMESTEPS + 1)
  shakespeare_in = shakespeare[:, :-1]
  shakespeare_out = shakespeare[:, 1:]

  # We can set a save_path in the runner to automatically checkpoint every so
  # often.  Otherwise at the end of the session, the model will be lost.
  runner = pt.train.Runner(save_path=FLAGS.save_path)
  with tf.Session():
    for epoch in xrange(FLAGS.epochs):
      # Shuffle the training data.
      shakespeare_in, shakespeare_out = data_utils.permute_data(
          (shakespeare_in, shakespeare_out))

      runner.train_model(train_op,
                         training_result.loss,
                         len(shakespeare_in) / BATCH_SIZE,
                         feed_vars=(input_placeholder, output_placeholder),
                         feed_data=pt.train.feed_numpy(
                             BATCH_SIZE, shakespeare_in, shakespeare_out),
                         print_every=10)
      classification_accuracy = runner.evaluate_model(
          accuracy,
          len(shakespeare_in) / BATCH_SIZE,
          feed_vars=(input_placeholder, output_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in,
                                        shakespeare_out))

      print('Next character accuracy after epoch %d: %g%%' % (
          epoch + 1, classification_accuracy * 100))

      # Use a temperature smaller than 1 because the early stages of the model
      # don't assign much confidence.
      print(sample(inference_input,
                   inference_logits,
                   max_length=128,
                   temperature=0.5))

    # Print a sampling from the model.
    print(sample(inference_input, inference_logits))

if FLAGS.model == 'full':
    result = multilayer_fully_connected(image_placeholder, labels_placeholder)
elif FLAGS.model == 'conv':
    result = lenet5(image_placeholder, labels_placeholder)
else:
    raise ValueError('model must be full or conv: %s' % FLAGS.model)

accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                              phase=pt.Phase.test)

train_images, train_labels = data_utils.mnist(training=True)
test_images, test_labels = data_utils.mnist(training=False)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_op = pt.apply_optimizer(optimizer, losses=[result.loss])
runner = pt.train.Runner(save_path=FLAGS.save_path)

with tf.Session():
    for epoch in range(10):
        train_images, train_labels = data_utils.permute_data(
            (train_images, train_labels))

        runner.train_model(train_op,
                           result.loss,
                           EPOCH_SIZE,
                           feed_vars=(image_placeholder, labels_placeholder),
                           feed_data=pt.train.feed_numpy(
                               BATCH_SIZE, train_images, train_labels),
                           print_every=100)
        classification_accuracy = runner.evaluate_model(
Exemple #42
0
    def initialize(self,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   global_step=None,
                   *args,
                   **kwargs):
        """Initialize variational inference.

    Parameters
    ----------
    optimizer : str or tf.train.Optimizer, optional
      A TensorFlow optimizer, to use for optimizing the variational
      objective. Alternatively, one can pass in the name of a
      TensorFlow optimizer, and default parameters for the optimizer
      will be used.
    var_list : list of tf.Variable, optional
      List of TensorFlow variables to optimize over. Default is all
      trainable variables that ``latent_vars`` and ``data`` depend on,
      excluding those that are only used in conditionals in ``data``.
    use_prettytensor : bool, optional
      ``True`` if aim to use PrettyTensor optimizer (when using
      PrettyTensor) or ``False`` if aim to use TensorFlow optimizer.
      Defaults to TensorFlow.
    global_step : tf.Variable, optional
      A TensorFlow variable to hold the global step.
    """
        super(VariationalInference, self).initialize(*args, **kwargs)

        if var_list is None:
            # Traverse random variable graphs to get default list of variables.
            var_list = set()
            trainables = tf.trainable_variables()
            for z, qz in six.iteritems(self.latent_vars):
                if isinstance(z, RandomVariable):
                    var_list.update(get_variables(z, collection=trainables))

                var_list.update(get_variables(qz, collection=trainables))

            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable) and \
                        not isinstance(qx, RandomVariable):
                    var_list.update(get_variables(x, collection=trainables))

            var_list = list(var_list)

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        if self.logging:
            summary_key = 'summaries_' + str(id(self))
            tf.summary.scalar("loss", self.loss, collections=[summary_key])
            for grad, var in grads_and_vars:
                # replace colons which are an invalid character
                tf.summary.histogram("gradient/" + var.name.replace(':', '/'),
                                     grad,
                                     collections=[summary_key])
                tf.summary.scalar("gradient_norm/" +
                                  var.name.replace(':', '/'),
                                  tf.norm(grad),
                                  collections=[summary_key])

            self.summarize = tf.summary.merge_all(key=summary_key)

        if optimizer is None and global_step is None:
            # Default optimizer always uses a global step variable.
            global_step = tf.Variable(0, trainable=False, name="global_step")

        if isinstance(global_step, tf.Variable):
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
        else:
            learning_rate = 0.01
            global_step = None

        # Build optimizer.
        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate)
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(learning_rate)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise ValueError('Optimizer class not found:', optimizer)
        elif not isinstance(optimizer, tf.train.Optimizer):
            raise TypeError(
                "Optimizer must be str, tf.train.Optimizer, or None.")

        scope = "optimizer_" + str(id(self))
        with tf.variable_scope(scope):
            if not use_prettytensor:
                self.train = optimizer.apply_gradients(grads_and_vars,
                                                       global_step=global_step)
            else:
                # Note PrettyTensor optimizer does not accept manual updates;
                # it autodiffs the loss directly.
                self.train = pt.apply_optimizer(optimizer,
                                                losses=[self.loss],
                                                global_step=global_step,
                                                var_list=var_list)

        self.reset.append(
            tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
def main(_=None, weight_init=tf.random_normal, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=50,
         learning_rate=0.01, prefix=None):
    tf.reset_default_graph()
    input_placeholder  = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])

    # Grab the data as numpy arrays.
    train_input, train_output = data_utils.mnist(training=True)
    test_input,  test_output  = data_utils.mnist(training=False)
    train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale)
    test_set  = ut.mnist_select_n_classes(test_input,  test_output,  NUM_CLASSES, min=data_min, scale=data_scale)
    train_input, train_output = train_set[0], train_set[0]
    test_input,  test_output  = test_set[0],  test_set[0]
    ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0])))
    visual_inputs, visual_output = train_set[0][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE]

    epoch_reconstruction = []

    EPOCH_SIZE = len(train_input) // BATCH_SIZE
    TEST_SIZE = len(test_input) // BATCH_SIZE

    assert_model(input_placeholder, output_placeholder, test_input, test_output, train_input, train_output, visual_inputs, visual_output)

    with pt.defaults_scope(activation_fn=activation_f,
                           # batch_normalize=True,
                           # learned_moments_update_rate=0.0003,
                           # variance_epsilon=0.001,
                           # scale_after_normalization=True
                           ):
        with pt.defaults_scope(phase=pt.Phase.train):
            with tf.variable_scope("model") as scope:
                output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init)

    pretty_loss = loss(output_tensor, output_placeholder)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train = pt.apply_optimizer(optimizer, losses=[pretty_loss])

    init = tf.initialize_all_variables()
    runner = pt.train.Runner(save_path=FLAGS.save_path)

    best_q = 100000
    with tf.Session() as sess:
        sess.run(init)
        for epoch in xrange(epochs):
            # Shuffle the training data.
            additional_info = ''

            if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs:
                reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output})
                epoch_reconstruction.append(reconstruct)
                additional_info += 'epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct))

            train_input, train_output = data_utils.permute_data(
                (train_input, train_output))

            runner.train_model(
                train,
                pretty_loss,
                EPOCH_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output),
                print_every=None
            )
            accuracy = runner.evaluate_model(
                pretty_loss,
                TEST_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output))
            ut.print_time('Accuracy after %2d/%d epoch %.2f; %s' % (epoch + 1, epochs, accuracy, additional_info))
            if best_q > accuracy:
                best_q = accuracy

        save_params = {'suf': 'mn_basic', 'act': activation_f, 'e': epochs, 'opt': optimizer, 'lr': learning_rate,
                       'init': weight_init, 'acu': int(best_q), 'bs': BATCH_SIZE, 'h': HIDDEN_0_SIZE, 'i':prefix}
        ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output, save_params=save_params)

    ut.print_time('Best Quality: %f for %s' % (best_q, ut.to_file_name(save_params)))
    ut.reset_start_time()
    return best_q
Exemple #44
0
    def init_opt(self):
        if self.dataset.name == "mnist":
            shape = [self.dataset.image_dim]
        elif 'FOLDER' in self.dataset.name:
            print "Selected folder image"
            shape = list(self.dataset.output_size)
        else:
            shape = [self.dataset.output_size, self.dataset.output_size, 3]
        self.input_tensor = input_tensor = tf.placeholder(
            tf.float32, [self.batch_size] + shape)

        with pt.defaults_scope(phase=pt.Phase.train):
            self.z_var = self.model.latent_dist.sample_prior(self.batch_size)
            fake_x, _ = self.model.generate(self.z_var)
            self.sample_x, _ = self.model.generate(self.z_var)

            if self.semiSup:
                self.sup_d = self.model.discriminateSup(
                    self.input_tensor, self.dataset.dataObj.getNclasses())
            self.fake_d = self.model.discriminate(fake_x)
            self.real_d = self.model.discriminate(input_tensor)

            self.d_feat_real = self.real_d['features']

            #Define the generator loss as the intermediate layer error (MSE)
            self.d_intermediateLayerGenerated = self.model.calcInterLayer(
                fake_x)
            self.d_intermediateLayerReal = self.model.calcInterLayer(
                input_tensor)

            # generator_loss = tf.reduce_mean(tf.squared_difference(self.d_intermediateLayerGenerated, self.d_intermediateLayerReal))
            generator_loss = 0

            if self.semiSup:
                self.input_labels = tf.placeholder(
                    tf.float32,
                    [self.batch_size,
                     self.dataset.dataObj.getNclasses()])
                discriminator_loss_sup = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        self.sup_d['logits'], self.input_labels))

                discriminator_loss_real = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        self.real_d['logits'],
                        tf.zeros_like(self.real_d['logits'])))
                discriminator_loss_fake = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        self.fake_d['logits'],
                        tf.ones_like(self.real_d['logits'])))
                discriminator_loss = discriminator_loss_real + discriminator_loss_fake + discriminator_loss_sup

                generator_loss += tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        self.fake_d['logits'],
                        tf.zeros_like(self.fake_d['logits'])))

                self.log_vars.append(
                    ("discriminator_sup_loss", discriminator_loss_sup))
            else:
                discriminator_loss_real = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        self.real_d['logits'],
                        tf.ones_like(self.real_d['prob'])))
                discriminator_loss_fake = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        self.fake_d['logits'],
                        tf.zeros_like(self.fake_d['prob'])))
                discriminator_loss = discriminator_loss_real + discriminator_loss_fake
                generator_loss += tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        self.fake_d['logits'],
                        tf.ones_like(self.fake_d['prob'])))

            self.log_vars.append(
                ("discriminator_loss_real", discriminator_loss_real))
            self.log_vars.append(
                ("discriminator_loss_fake", discriminator_loss_fake))
            self.log_vars.append(("discriminator_loss", discriminator_loss))
            self.log_vars.append(("generator_loss", generator_loss))

            real_d_sum = tf.histogram_summary("real_d", self.real_d['prob'])
            fake_d_sum = tf.histogram_summary("fake_d", self.fake_d['prob'])

            if self.model.is_reg:
                reg_z = self.model.reg_z(self.z_var)
                mi_est = tf.constant(0.)
                cross_ent = tf.constant(0.)

                # compute for discrete and continuous codes separately
                # discrete:
                if len(self.model.reg_disc_latent_dist.dists) > 0:
                    disc_reg_z = self.model.disc_reg_z(reg_z)
                    disc_reg_dist_info = self.model.disc_reg_dist_info(
                        self.fake_d['reg_dist_info']
                    )  # Returns a dictionary of activations for each distribution
                    disc_log_q_c_given_x = self.model.reg_disc_latent_dist.logli(
                        disc_reg_z, disc_reg_dist_info)
                    disc_log_q_c = self.model.reg_disc_latent_dist.logli_prior(
                        disc_reg_z)
                    disc_cross_ent = tf.reduce_mean(-disc_log_q_c_given_x)
                    disc_ent = tf.reduce_mean(-disc_log_q_c)
                    disc_mi_est = disc_ent - disc_cross_ent
                    mi_est += disc_mi_est
                    cross_ent += disc_cross_ent
                    self.log_vars.append(("MI_disc", disc_mi_est))
                    self.log_vars.append(("CrossEnt_disc", disc_cross_ent))
                    discriminator_loss -= self.info_reg_coeff * disc_mi_est
                    generator_loss -= self.info_reg_coeff * disc_mi_est

                    real_disc_reg_dist_info = self.model.disc_reg_dist_info(
                        self.real_d['reg_dist_info'])
                    assert len(
                        real_disc_reg_dist_info.keys()
                    ) == 1  # currently support only one categorical distribution
                    self.disc_prob = real_disc_reg_dist_info[
                        real_disc_reg_dist_info.keys()[0]]

                if len(self.model.reg_cont_latent_dist.dists) > 0:
                    cont_reg_z = self.model.cont_reg_z(reg_z)
                    cont_reg_dist_info = self.model.cont_reg_dist_info(
                        self.fake_d['reg_dist_info'])
                    cont_log_q_c_given_x = self.model.reg_cont_latent_dist.logli(
                        cont_reg_z, cont_reg_dist_info)
                    cont_log_q_c = self.model.reg_cont_latent_dist.logli_prior(
                        cont_reg_z)
                    cont_cross_ent = tf.reduce_mean(-cont_log_q_c_given_x)
                    cont_ent = tf.reduce_mean(-cont_log_q_c)
                    cont_mi_est = cont_ent - cont_cross_ent
                    mi_est += cont_mi_est
                    cross_ent += cont_cross_ent
                    self.log_vars.append(("MI_cont", cont_mi_est))
                    self.log_vars.append(("CrossEnt_cont", cont_cross_ent))
                    discriminator_loss -= self.info_reg_coeff * cont_mi_est
                    generator_loss -= self.info_reg_coeff * cont_mi_est

                for idx, dist_info in enumerate(
                        self.model.reg_latent_dist.split_dist_info(
                            self.fake_d['reg_dist_info'])):
                    if "stddev" in dist_info:
                        self.log_vars.append(
                            ("max_std_%d" % idx,
                             tf.reduce_max(dist_info["stddev"])))
                        self.log_vars.append(
                            ("min_std_%d" % idx,
                             tf.reduce_min(dist_info["stddev"])))

                self.log_vars.append(("MI", mi_est))
                self.log_vars.append(("CrossEnt", cross_ent))

            all_vars = tf.trainable_variables()
            d_vars = [var for var in all_vars if var.name.startswith('d_')]
            g_vars = [var for var in all_vars if var.name.startswith('g_')]

            discriminator_optimizer = tf.train.AdamOptimizer(
                self.discriminator_learning_rate, beta1=0.5)
            self.discriminator_trainer = pt.apply_optimizer(
                discriminator_optimizer,
                losses=[discriminator_loss],
                var_list=d_vars)

            generator_optimizer = tf.train.AdamOptimizer(
                self.generator_learning_rate, beta1=0.5)
            self.generator_trainer = pt.apply_optimizer(
                generator_optimizer, losses=[generator_loss], var_list=g_vars)

            for k, v in self.log_vars:
                tf.scalar_summary(k, v)

        if self.model.is_reg and self.dataset.name != 'imagenet':
            if self.model.encoder_dim <= 12:  # Ugly conditioning!!! Fix later
                with pt.defaults_scope(phase=pt.Phase.test):
                    with tf.variable_scope("model", reuse=True) as scope:
                        self.visualize_all_factors()
Exemple #45
0
def main(_=None):
  print('Starting Baby Names')

  # Since we are feeding our data as numpy arrays, we need to create
  # placeholders in the graph.
  # These must then be fed using the feed dict.
  input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
  output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES])

  inp = data_utils.reshape_data(input_placeholder)

  # Create a label for each timestep.
  labels = data_utils.reshape_data(
      tf.reshape(
          tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS,
                                                        SEXES]),
      per_example_length=2)

  # We also need to set per example weights so that the softmax doesn't output a
  # prediction on intermediate nodes.
  length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1])

  # We need a dense multiplier for the per example weights.  The only place
  # that has a non-zero loss is the first EOS after the last character of the
  # name; the characters in the name and the trailing EOS characters are given a
  # 0 loss by assigning the weight to 0.0 and in the end only one character in
  # each batch has a weight of 1.0.
  # sparse_to_dense does a lookup using the indices from the first Tensor.
  # Because we are filling in a 2D array, the indices need to be 2 dimensional.
  # Since we want to assign 1 value for each row, the first dimension can just
  # be a sequence.
  t = tf.concat_v2(
      [
          tf.constant(
              numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)),
              dtype=tf.int32), length_placeholder
      ],
      1)

  # Squeeze removes dimensions that are equal to 1.  per_example_weights must
  # end up as 1 dimensional.
  per_example_weights = data_utils.reshape_data(tf.sparse_to_dense(
      t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze()

  # We need 2 copies of the graph that share variables.  The first copy runs
  # training and will do dropout if specified and the second will not include
  # dropout.  Dropout is controlled by the phase argument, which sets the mode
  # consistently throughout a graph.
  with tf.variable_scope('baby_names'):
    result = create_model(inp, labels, TIMESTEPS, per_example_weights)

  # Call variable scope by name so we also create a name scope.  This ensures
  # that we share variables and our names are properly organized.
  with tf.variable_scope('baby_names', reuse=True):
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    test_result = create_model(inp,
                               labels,
                               TIMESTEPS,
                               per_example_weights,
                               phase=pt.Phase.test)

  # For tracking accuracy in evaluation, we need to add an evaluation node.
  # We only run this when testing, so we need to specify that in the phase.
  # Some ops have different behaviors in test vs train and these take a phase
  # argument.
  accuracy = test_result.softmax.evaluate_classifier(
      labels,
      phase=pt.Phase.test,
      per_example_weights=per_example_weights)

  # We can also compute a batch accuracy to monitor progress.
  batch_accuracy = result.softmax.evaluate_classifier(
      labels,
      phase=pt.Phase.train,
      per_example_weights=per_example_weights)

  # Grab the inputs, outputs and lengths as numpy arrays.
  # Lengths could have been calculated from names, but it was easier to
  # calculate inside the utility function.
  names, sex, lengths = data_utils.baby_names(TIMESTEPS)

  epoch_size = len(names) // BATCH_SIZE
  # Create the gradient optimizer and apply it to the graph.
  # pt.apply_optimizer adds regularization losses and sets up a step counter
  # (pt.global_step()) for you.
  # This sequence model does very well with initially high rates.
  optimizer = tf.train.AdagradOptimizer(
      tf.train.exponential_decay(1.0,
                                 pt.global_step(),
                                 epoch_size,
                                 0.95,
                                 staircase=True))
  train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

  # We can set a save_path in the runner to automatically checkpoint every so
  # often.  Otherwise at the end of the session, the model will be lost.
  runner = pt.train.Runner(save_path=FLAGS.save_path)
  with tf.Session():
    for epoch in xrange(100):
      # Shuffle the training data.
      names, sex, lengths = data_utils.permute_data((names, sex, lengths))

      runner.train_model(
          train_op,
          [result.loss, batch_accuracy],
          epoch_size,
          feed_vars=(input_placeholder, output_placeholder, length_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths),
          print_every=100)
      classification_accuracy = runner.evaluate_model(
          accuracy,
          epoch_size,
          print_every=0,
          feed_vars=(input_placeholder, output_placeholder, length_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths))

      print('Accuracy after epoch %d: %g%%' % (
          epoch + 1, classification_accuracy * 100))
    def init_opt(self):
        self.input_tensor = input_tensor = tf.placeholder(tf.float32, [self.batch_size, self.dataset.image_dim])

        with pt.defaults_scope(phase=pt.Phase.train):
            z_var = self.model.latent_dist.sample_prior(self.batch_size)
            self.fake_x, _ = self.model.generate(z_var)
            real_d, _, _, _ = self.model.discriminate(input_tensor)
            fake_d, _, fake_reg_z_dist_info, _ = self.model.discriminate(self.fake_x)

            reg_z = self.model.reg_z(z_var)

            discriminator_loss = - tf.reduce_mean(tf.log(real_d + TINY) + tf.log(1. - fake_d + TINY))
            generator_loss = - tf.reduce_mean(tf.log(fake_d + TINY))

            self.log_vars.append(("discriminator_loss", discriminator_loss))
            self.log_vars.append(("generator_loss", generator_loss))

            mi_est = tf.constant(0.)
            cross_ent = tf.constant(0.)

            # compute for discrete and continuous codes separately
            # discrete:
            if len(self.model.reg_disc_latent_dist.dists) > 0:
                disc_reg_z = self.model.disc_reg_z(reg_z)
                disc_reg_dist_info = self.model.disc_reg_dist_info(fake_reg_z_dist_info)
                disc_log_q_c_given_x = self.model.reg_disc_latent_dist.logli(disc_reg_z, disc_reg_dist_info)
                disc_log_q_c = self.model.reg_disc_latent_dist.logli_prior(disc_reg_z)
                disc_cross_ent = tf.reduce_mean(-disc_log_q_c_given_x)
                disc_ent = tf.reduce_mean(-disc_log_q_c)
                disc_mi_est = disc_ent - disc_cross_ent
                mi_est += disc_mi_est
                cross_ent += disc_cross_ent
                self.log_vars.append(("MI_disc", disc_mi_est))
                self.log_vars.append(("CrossEnt_disc", disc_cross_ent))
                discriminator_loss -= self.info_reg_coeff * disc_mi_est
                generator_loss -= self.info_reg_coeff * disc_mi_est

            if len(self.model.reg_cont_latent_dist.dists) > 0:
                cont_reg_z = self.model.cont_reg_z(reg_z)
                cont_reg_dist_info = self.model.cont_reg_dist_info(fake_reg_z_dist_info)
                cont_log_q_c_given_x = self.model.reg_cont_latent_dist.logli(cont_reg_z, cont_reg_dist_info)
                cont_log_q_c = self.model.reg_cont_latent_dist.logli_prior(cont_reg_z)
                cont_cross_ent = tf.reduce_mean(-cont_log_q_c_given_x)
                cont_ent = tf.reduce_mean(-cont_log_q_c)
                cont_mi_est = cont_ent - cont_cross_ent
                mi_est += cont_mi_est
                cross_ent += cont_cross_ent
                self.log_vars.append(("MI_cont", cont_mi_est))
                self.log_vars.append(("CrossEnt_cont", cont_cross_ent))
                discriminator_loss -= self.info_reg_coeff * cont_mi_est
                generator_loss -= self.info_reg_coeff * cont_mi_est

            for idx, dist_info in enumerate(self.model.reg_latent_dist.split_dist_info(fake_reg_z_dist_info)):
                if "stddev" in dist_info:
                    self.log_vars.append(("max_std_%d" % idx, tf.reduce_max(dist_info["stddev"])))
                    self.log_vars.append(("min_std_%d" % idx, tf.reduce_min(dist_info["stddev"])))

            self.log_vars.append(("MI", mi_est))
            self.log_vars.append(("CrossEnt", cross_ent))

            all_vars = tf.trainable_variables()
            d_vars = [var for var in all_vars if var.name.startswith('d_')]
            g_vars = [var for var in all_vars if var.name.startswith('g_')]

            self.log_vars.append(("max_real_d", tf.reduce_max(real_d)))
            self.log_vars.append(("min_real_d", tf.reduce_min(real_d)))
            self.log_vars.append(("max_fake_d", tf.reduce_max(fake_d)))
            self.log_vars.append(("min_fake_d", tf.reduce_min(fake_d)))

            discriminator_optimizer = tf.train.AdamOptimizer(self.discriminator_learning_rate, beta1=0.5)
            self.discriminator_trainer = pt.apply_optimizer(discriminator_optimizer, losses=[discriminator_loss],
                                                            var_list=d_vars)

            generator_optimizer = tf.train.AdamOptimizer(self.generator_learning_rate, beta1=0.5)
            self.generator_trainer = pt.apply_optimizer(generator_optimizer, losses=[generator_loss], var_list=g_vars)

            for k, v in self.log_vars:
                tf.scalar_summary(k, v)

        with pt.defaults_scope(phase=pt.Phase.test):
            with tf.variable_scope("model", reuse=True) as scope:
                self.visualize_all_factors()
  def initialize(self, optimizer=None, var_list=None, use_prettytensor=False,
                 *args, **kwargs):
    """Initialize variational inference.

    Parameters
    ----------
    optimizer : str or tf.train.Optimizer, optional
      A TensorFlow optimizer, to use for optimizing the variational
      objective. Alternatively, one can pass in the name of a
      TensorFlow optimizer, and default parameters for the optimizer
      will be used.
    var_list : list of tf.Variable, optional
      List of TensorFlow variables to optimize over. Default is all
      trainable variables that ``latent_vars`` and ``data`` depend on,
      excluding those that are only used in conditionals in ``data``.
    use_prettytensor : bool, optional
      ``True`` if aim to use PrettyTensor optimizer (when using
      PrettyTensor) or ``False`` if aim to use TensorFlow optimizer.
      Defaults to TensorFlow.
    """
    super(VariationalInference, self).initialize(*args, **kwargs)

    if var_list is None:
      if self.model_wrapper is None:
        # Traverse random variable graphs to get default list of variables.
        var_list = set([])
        trainables = tf.trainable_variables()
        for z, qz in six.iteritems(self.latent_vars):
          if isinstance(z, RandomVariable):
            var_list.update(get_variables(z, collection=trainables))

          var_list.update(get_variables(qz, collection=trainables))

        for x, qx in six.iteritems(self.data):
          if isinstance(x, RandomVariable) and \
                  not isinstance(qx, RandomVariable):
            var_list.update(get_variables(x, collection=trainables))

        var_list = list(var_list)
      else:
        # Variables may not be instantiated for model wrappers until
        # their methods are first called. For now, hard-code
        # ``var_list`` inside build_losses.
        var_list = None

    self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

    if optimizer is None:
      # Use ADAM with a decaying scale factor.
      global_step = tf.Variable(0, trainable=False)
      starter_learning_rate = 0.1
      learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                 global_step,
                                                 100, 0.9, staircase=True)
      optimizer = tf.train.AdamOptimizer(learning_rate)
    elif isinstance(optimizer, str):
      if optimizer == 'gradientdescent':
        optimizer = tf.train.GradientDescentOptimizer(0.01)
      elif optimizer == 'adadelta':
        optimizer = tf.train.AdadeltaOptimizer()
      elif optimizer == 'adagrad':
        optimizer = tf.train.AdagradOptimizer(0.01)
      elif optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(0.01, 0.9)
      elif optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer()
      elif optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(0.01)
      elif optimizer == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(0.01)
      else:
        raise ValueError('Optimizer class not found:', optimizer)

      global_step = None
    elif isinstance(optimizer, tf.train.Optimizer):
      # Custom optimizers have no control over global_step.
      global_step = None
    else:
      raise TypeError()

    if not use_prettytensor:
      self.train = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)
    else:
      # Note PrettyTensor optimizer does not accept manual updates;
      # it autodiffs the loss directly.
      self.train = pt.apply_optimizer(optimizer, losses=[self.loss],
                                      global_step=global_step,
                                      var_list=var_list)
Exemple #48
0
def main(_=None):
  print 'Starting Shakespeare'

  # Since we are feeding our data as numpy arrays, we need to create
  # placeholders in the graph.
  # These must then be fed using the feed dict.
  input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
  output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])

  merged_size = BATCH_SIZE * TIMESTEPS

  inp = data_utils.reshape_data(input_placeholder)

  # We need a dense output to calculate loss and accuracy.
  # sparse_to_dense does a lookup using the indices from the first Tensor.
  # Because we are filling in a 2D array, the indices need to be 2 dimensional.
  t = tf.concat(1,
                [
                    tf.constant(
                        numpy.arange(merged_size).reshape((merged_size, 1)),
                        dtype=tf.int32),
                    data_utils.reshape_data(output_placeholder)
                ])

  labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0)

  # Some ops have different behaviors in test vs train and these take a phase
  # argument.
  with tf.variable_scope('shakespeare'):
    training_logits = create_model(inp, TIMESTEPS, pt.Phase.train)
    # Create the result.  Softmax applies softmax and creates a cross entropy
    # loss.  The result is a namedtuple.
    training_result = training_logits.softmax(labels)

  # Create the gradient optimizer and apply it to the graph.
  # pt.apply_optimizer adds regularization losses and sets up a step counter
  # (pt.global_step()) for you.
  optimizer = tf.train.AdagradOptimizer(0.5)
  train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss])

  # For tracking accuracy in evaluation, we need to add an evaluation node.
  # We only run this when testing, so we need to specify that in the phase.
  # We also want to disable dropout, so we pass the phase to create_model.

  # Call variable scope by name so we also create a name scope.  This ensures
  # that we share variables and our names are properly organized.
  with tf.variable_scope('shakespeare', reuse=True):
    test_logits = create_model(inp, TIMESTEPS, pt.Phase.test)
    test_result = test_logits.softmax(labels)

  # Accuracy creates variables, so make it outside of the above scope.
  accuracy = test_result.softmax.evaluate_classifier(labels,
                                                     phase=pt.Phase.test)

  # Create an inference model so that we can sample.  The big difference is
  # that the input is a single character and it requires reset nodes.
  with tf.variable_scope('shakespeare', reuse=True):
    inference_input = tf.placeholder(tf.int32, [])
    # Needs to be 2 dimensional so that it matches the dims of the other models.
    reshaped = pt.wrap(inference_input).reshape([1, 1])
    inference_logits = create_model(reshaped, 1, pt.Phase.infer)

  # Grab the data as numpy arrays.
  shakespeare = data_utils.shakespeare(TIMESTEPS + 1)
  shakespeare_in = shakespeare[:, :-1]
  shakespeare_out = shakespeare[:, 1:]

  # We can set a save_path in the runner to automatically checkpoint every so
  # often.  Otherwise at the end of the session, the model will be lost.
  runner = pt.train.Runner(save_path=FLAGS.save_path)
  with tf.Session():
    for epoch in xrange(FLAGS.epochs):
      # Shuffle the training data.
      shakespeare_in, shakespeare_out = data_utils.permute_data(
          (shakespeare_in, shakespeare_out))

      runner.train_model(train_op,
                         training_result.loss,
                         len(shakespeare_in) / BATCH_SIZE,
                         feed_vars=(input_placeholder, output_placeholder),
                         feed_data=pt.train.feed_numpy(
                             BATCH_SIZE, shakespeare_in, shakespeare_out),
                         print_every=10)
      classification_accuracy = runner.evaluate_model(
          accuracy,
          len(shakespeare_in) / BATCH_SIZE,
          feed_vars=(input_placeholder, output_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in,
                                        shakespeare_out))

      print 'Next character accuracy after epoch %d: %g%%' % (
          epoch + 1, classification_accuracy * 100)

      # Use a temperature smaller than 1 because the early stages of the model
      # don't assign much confidence.
      print sample(inference_input,
                   inference_logits,
                   max_length=128,
                   temperature=0.5)

    # Print a sampling from the model.
    print sample(inference_input, inference_logits)
import prettytensor as pt
import numpy as np
import cmtf.data.data_mnist as data_mnist

# 数据
mnist = data_mnist.read_data_sets(one_hot=True)

x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None,10])
pretty_input = pt.wrap(x)
softmax, loss = (
	pretty_input.
	fully_connected(100, activation_fn=tf.nn.relu).
	fully_connected(10, activation_fn=None).
	softmax_classifier(10, labels=y))

accuracy = softmax.evaluate_classifier(y)
optimizer = tf.train.GradientDescentOptimizer(0.01)  # learning rate
train_op = pt.apply_optimizer(optimizer, losses=[loss])

with tf.Session() as sess:
	sess.run(tf.initialize_all_variables())
	# train
	for i in range(2000):
		batch_xs, batch_ys = mnist.train.next_batch(100)
		_, loss_val = sess.run([train_op, loss], feed_dict={x: batch_xs, y: batch_ys})
		if (i+1)%100 == 0:
			print 'index: %d, loss: %f' % (i+1, loss_val)
	# test
	accuracy_value = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
	print 'Accuracy: %g' % accuracy_value
Exemple #50
0
    def initialize(self, n_iter=1000, n_minibatch=None, n_print=100,
        optimizer=None, scope=None):
        """Initialize variational inference algorithm.

        Set up ``tf.train.AdamOptimizer`` with a decaying scale factor.

        Initialize all variables.

        Parameters
        ----------
        n_iter : int, optional
            Number of iterations for optimization.
        n_minibatch : int, optional
            Number of samples for data subsampling. Default is to use
            all the data. Subsampling is available only if all data
            passed in are NumPy arrays and the model is not a Stan
            model. For subsampling details, see
            ``tf.train.slice_input_producer`` and ``tf.train.batch``.
        n_print : int, optional
            Number of iterations for each print progress. To suppress print
            progress, then specify None.
        optimizer : str, optional
            Whether to use TensorFlow optimizer or PrettyTensor
            optimizer when using PrettyTensor. Defaults to TensorFlow.
        scope : str, optional
            Scope of TensorFlow variable objects to optimize over.
        """
        self.n_iter = n_iter
        self.n_minibatch = n_minibatch
        self.n_print = n_print
        self.loss = tf.constant(0.0)

        if n_minibatch is not None and not isinstance(self.model, StanModel):
            # Re-assign data to batch tensors, with size given by
            # ``n_data``.
            values = list(six.itervalues(self.data))
            slices = tf.train.slice_input_producer(values)
            # By default use as many threads as CPUs.
            batches = tf.train.batch(slices, n_minibatch,
                                     num_threads=multiprocessing.cpu_count())
            if not isinstance(batches, list):
                # ``tf.train.batch`` returns tf.Tensor if ``slices`` is a
                # list of size 1.
                batches = [batches]

            self.data = {key: value for key, value in
                         zip(six.iterkeys(self.data), batches)}

        loss = self.build_loss()
        if optimizer is None:
            var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                         scope=scope)
            # Use ADAM with a decaying scale factor.
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                global_step,
                                                100, 0.9, staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            self.train = optimizer.minimize(loss, global_step=global_step,
                                            var_list=var_list)
        else:
            if scope is not None:
                raise NotImplementedError("PrettyTensor optimizer does not accept a variable scope.")

            optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
            self.train = pt.apply_optimizer(optimizer, losses=[loss])

        init = tf.initialize_all_variables()
        init.run()

        # Start input enqueue threads.
        self.coord = tf.train.Coordinator()
        self.threads = tf.train.start_queue_runners(coord=self.coord)
Exemple #51
0
    def init_opt(self):
        #self.input_tensor = input_tensor = tf.placeholder(tf.float32, [self.batch_size, self.dataset.image_dim])
        #self.images = tf.placeholder(tf.float32, [self.batch_size, self.image_size,self.image_size,1])

        self.images = tf.placeholder(tf.float32, [self.batch_size, self.dataset.image_size, self.dataset.image_size, self.dataset.c_dim ])


        pstr('self.input_tensor',self.input_tensor)
        
        with pt.defaults_scope(phase=pt.Phase.train):
            z_var = self.model.latent_dist.sample_prior(self.batch_size)
            pstr('0 batch_size',self.batch_size)
            pstr('1 z_var',z_var)
            #print("1 %d | " % z_var )
            fake_x, _ = self.model.generate(z_var)
            pstr('1.1 fake_x',fake_x)
            pstr('1.1 self.images',self.images)

            real_d, _, _, _, real_d_log = self.model.discriminate(self.images)
            fake_d, sample, fake_reg_z_dist_info, reg_dist_flat, fake_d_log  = self.model.discriminate(fake_x)
            
            #d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(real_d_log, tf.ones_like(real_d)))
            #d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(fake_d_log, tf.zeros_like(fake_d)))
            #discriminator_loss = d_loss_real + d_loss_fake
            #generator_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(fake_d_log, tf.ones_like(fake_d)))


            pstr('1.1.2 sample',sample)
            pstr('1.1.2 fake_reg_z_dist_info',fake_reg_z_dist_info)
            pstr('1.1.2 reg_dist_flat',reg_dist_flat)


            pstr('1.1 fake_d',fake_d)
            pstr('1.5 fake_reg_z_dist_info',fake_reg_z_dist_info)

            reg_z = self.model.reg_z(z_var)
            pstr('2 reg_z',reg_z)

            discriminator_loss = - tf.reduce_mean(tf.log(real_d + TINY) + tf.log(1. - fake_d + TINY))
            generator_loss = - tf.reduce_mean(tf.log(fake_d + TINY))

            self.log_vars.append(("discriminator_loss", discriminator_loss))
            self.log_vars.append(("generator_loss", generator_loss))

            mi_est = tf.constant(0.)
            cross_ent = tf.constant(0.)



            # compute for discrete and continuous codes separately
            # discrete:
            if len(self.model.reg_disc_latent_dist.dists) > 0:
                disc_reg_z = self.model.disc_reg_z(reg_z)
                pstr('3 disc_reg_z',disc_reg_z)
                disc_reg_dist_info = self.model.disc_reg_dist_info(fake_reg_z_dist_info)
                pstr('4 disc_reg_dist_info',disc_reg_dist_info)
                disc_log_q_c_given_x = self.model.reg_disc_latent_dist.logli(disc_reg_z, disc_reg_dist_info)
                pstr('5 disc_log_q_c_given_x',disc_log_q_c_given_x)
                disc_log_q_c = self.model.reg_disc_latent_dist.logli_prior(disc_reg_z)
                pstr('6 disc_log_q_c',disc_log_q_c)
                disc_cross_ent = tf.reduce_mean(-disc_log_q_c_given_x)
                pstr('7 disc_cross_ent',disc_cross_ent)
                disc_ent = tf.reduce_mean(-disc_log_q_c)
                disc_mi_est = disc_ent - disc_cross_ent
                mi_est += disc_mi_est
                cross_ent += disc_cross_ent
                self.log_vars.append(("MI_disc", disc_mi_est))
                self.log_vars.append(("CrossEnt_disc", disc_cross_ent))
                discriminator_loss -= 10 * disc_mi_est
                generator_loss -= self.info_reg_coeff * disc_mi_est

            if len(self.model.reg_cont_latent_dist.dists) > 0:
                cont_reg_z = self.model.cont_reg_z(reg_z)
                pstr('8 cont_reg_z',cont_reg_z)
                cont_reg_dist_info = self.model.cont_reg_dist_info(fake_reg_z_dist_info)
                pstr('9 cont_reg_dist_info', cont_reg_dist_info)
                cont_log_q_c_given_x = self.model.reg_cont_latent_dist.logli(cont_reg_z, cont_reg_dist_info)
                pstr('10 cont_log_q_c_given_x', cont_log_q_c_given_x)
                cont_log_q_c = self.model.reg_cont_latent_dist.logli_prior(cont_reg_z)
                pstr('11 cont_log_q_c',cont_log_q_c)
                cont_cross_ent = tf.reduce_mean(-cont_log_q_c_given_x)
                cont_ent = tf.reduce_mean(-cont_log_q_c)
                cont_mi_est = cont_ent - cont_cross_ent
                mi_est += cont_mi_est
                cross_ent += cont_cross_ent
                self.log_vars.append(("MI_cont", cont_mi_est))
                self.log_vars.append(("CrossEnt_cont", cont_cross_ent))
                discriminator_loss -= 10 * cont_mi_est
                generator_loss -= self.info_reg_coeff * cont_mi_est

            pstr('1.1 generator_loss',generator_loss)

            for idx, dist_info in enumerate(self.model.reg_latent_dist.split_dist_info(fake_reg_z_dist_info)):
                if "stddev" in dist_info:
                    self.log_vars.append(("max_std_%d" % idx, tf.reduce_max(dist_info["stddev"])))
                    self.log_vars.append(("min_std_%d" % idx, tf.reduce_min(dist_info["stddev"])))

            self.log_vars.append(("MI", mi_est))
            self.log_vars.append(("CrossEnt", cross_ent))

            all_vars = tf.trainable_variables()
            d_vars = [var for var in all_vars if var.name.startswith('d_')]
            g_vars = [var for var in all_vars if var.name.startswith('g_')]

            pstr('1.1 g_vars',g_vars)


            self.log_vars.append(("max_real_d", tf.reduce_max(real_d)))
            self.log_vars.append(("min_real_d", tf.reduce_min(real_d)))
            self.log_vars.append(("max_fake_d", tf.reduce_max(fake_d)))
            self.log_vars.append(("min_fake_d", tf.reduce_min(fake_d)))

            discriminator_optimizer = tf.train.AdamOptimizer(self.discriminator_learning_rate, beta1=0.1)
            self.discriminator_trainer = pt.apply_optimizer(discriminator_optimizer, losses=[discriminator_loss],
                                                            var_list=d_vars)

            generator_optimizer = tf.train.AdamOptimizer(self.generator_learning_rate, beta1=0.1,epsilon=1e-1024)
            self.generator_trainer = pt.apply_optimizer(generator_optimizer, losses=[generator_loss], var_list=g_vars)

            for k, v in self.log_vars:
                tf.scalar_summary(k, v)

        with pt.defaults_scope(phase=pt.Phase.test):
            with tf.variable_scope("model", reuse=True) as scope:
                self.visualize_all_factors()
    def initialize(self,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   *args,
                   **kwargs):
        """Initialize variational inference.

    Parameters
    ----------
    optimizer : str or tf.train.Optimizer, optional
      A TensorFlow optimizer, to use for optimizing the variational
      objective. Alternatively, one can pass in the name of a
      TensorFlow optimizer, and default parameters for the optimizer
      will be used.
    var_list : list of tf.Variable, optional
      List of TensorFlow variables to optimize over. Default is all
      trainable variables that ``latent_vars`` and ``data`` depend on,
      excluding those that are only used in conditionals in ``data``.
    use_prettytensor : bool, optional
      ``True`` if aim to use PrettyTensor optimizer (when using
      PrettyTensor) or ``False`` if aim to use TensorFlow optimizer.
      Defaults to TensorFlow.
    """
        super(VariationalInference, self).initialize(*args, **kwargs)

        if var_list is None:
            if self.model_wrapper is None:
                # Traverse random variable graphs to get default list of variables.
                var_list = set([])
                trainables = tf.trainable_variables()
                for z, qz in six.iteritems(self.latent_vars):
                    if isinstance(z, RandomVariable):
                        var_list.update(get_variables(z,
                                                      collection=trainables))

                    var_list.update(get_variables(qz, collection=trainables))

                for x, qx in six.iteritems(self.data):
                    if isinstance(x, RandomVariable) and \
                            not isinstance(qx, RandomVariable):
                        var_list.update(get_variables(x,
                                                      collection=trainables))

                var_list = list(var_list)
            else:
                # Variables may not be instantiated for model wrappers until
                # their methods are first called. For now, hard-code
                # ``var_list`` inside build_losses.
                var_list = None

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        if optimizer is None:
            # Use ADAM with a decaying scale factor.
            global_step = tf.Variable(0, trainable=False)
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(0.01)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer()
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(0.01)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(0.01, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer()
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(0.01)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(0.01)
            else:
                raise ValueError('Optimizer class not found:', optimizer)

            global_step = None
        elif isinstance(optimizer, tf.train.Optimizer):
            # Custom optimizers have no control over global_step.
            global_step = None
        else:
            raise TypeError()

        if not use_prettytensor:
            self.train = optimizer.apply_gradients(grads_and_vars,
                                                   global_step=global_step)
        else:
            # Note PrettyTensor optimizer does not accept manual updates;
            # it autodiffs the loss directly.
            self.train = pt.apply_optimizer(optimizer,
                                            losses=[self.loss],
                                            global_step=global_step,
                                            var_list=var_list)
  def initialize(self, optimizer=None, scope=None, use_prettytensor=False,
                 *args, **kwargs):
    """Initialize variational inference algorithm.

    Initialize all variables.

    Parameters
    ----------
    optimizer : str or tf.train.Optimizer, optional
      A TensorFlow optimizer, to use for optimizing the variational
      objective. Alternatively, one can pass in the name of a
      TensorFlow optimizer, and default parameters for the optimizer
      will be used.
    scope : str, optional
      Scope of TensorFlow variables to optimize over. Default is all
      trainable variables.
    use_prettytensor : bool, optional
      ``True`` if aim to use TensorFlow optimizer or ``False`` if aim
      to use PrettyTensor optimizer (when using PrettyTensor).
      Defaults to TensorFlow.
    """
    super(VariationalInference, self).initialize(*args, **kwargs)
    self.loss = tf.constant(0.0)

    if optimizer is None:
      # Use ADAM with a decaying scale factor.
      global_step = tf.Variable(0, trainable=False)
      starter_learning_rate = 0.1
      learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                 global_step,
                                                 100, 0.9, staircase=True)
      optimizer = tf.train.AdamOptimizer(learning_rate)
    elif isinstance(optimizer, str):
      if optimizer == 'gradientdescent':
        optimizer = tf.train.GradientDescentOptimizer(0.01)
      elif optimizer == 'adadelta':
        optimizer = tf.train.AdadeltaOptimizer()
      elif optimizer == 'adagrad':
        optimizer = tf.train.AdagradOptimizer(0.01)
      elif optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(0.01, 0.9)
      elif optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer()
      elif optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(0.01)
      elif optimizer == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(0.01)
      else:
        raise ValueError('Optimizer class not found:', optimizer)

      global_step = None
    elif isinstance(optimizer, tf.train.Optimizer):
      # Custom optimizers have no control over global_step.
      global_step = None
    else:
      raise TypeError()

    if getattr(self, 'build_loss_and_gradients', None) is not None:
      self.loss, grads_and_vars = self.build_loss_and_gradients(scope=scope)
    else:
      self.loss = self.build_loss()
      var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope=scope)
      grads_and_vars = optimizer.compute_gradients(self.loss, var_list=var_list)

    if not use_prettytensor:
      self.train = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)
    else:
      if getattr(self, 'build_loss_and_gradients', None) is not None:
        raise NotImplementedError("PrettyTensor optimizer does not accept "
                                  "manual gradients.")

      self.train = pt.apply_optimizer(optimizer, losses=[self.loss],
                                      global_step=global_step,
                                      var_list=var_list)
Exemple #54
0
    vae_loss = get_vae_cost(mean, stddev)
    #rec_loss = get_reconstruction_cost(output_tensor, input_tensor)

    #loss = vae_loss + rec_loss
    loss = get_dvib_cost(mean, stddev, output_tensor, label_tensor)
    with tf.name_scope('accuracy'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.equal(
                tf.sign(output_tensor),
                tf.cast(tf.sign(label_tensor), tf.float32))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1.0)
    train = pt.apply_optimizer(optimizer, losses=[loss])
    saver = tf.train.Saver()

    init = tf.initialize_all_variables()

    # Config session for memory
    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.log_device_placement = True

    with tf.Session(config=config) as sess:
        sess.run(init)

        for epoch in range(FLAGS.max_epoch):
            training_loss = 0.0
  def initialize(self, optimizer=None, var_list=None, use_prettytensor=False,
                 global_step=None, *args, **kwargs):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Args:
      optimizer: str or tf.train.Optimizer, optional.
        A TensorFlow optimizer, to use for optimizing the variational
        objective. Alternatively, one can pass in the name of a
        TensorFlow optimizer, and default parameters for the optimizer
        will be used.
      var_list: list of tf.Variable, optional.
        List of TensorFlow variables to optimize over. Default is all
        trainable variables that `latent_vars` and `data` depend on,
        excluding those that are only used in conditionals in `data`.
      use_prettytensor: bool, optional.
        `True` if aim to use PrettyTensor optimizer (when using
        PrettyTensor) or `False` if aim to use TensorFlow optimizer.
        Defaults to TensorFlow.
      global_step: tf.Variable, optional.
        A TensorFlow variable to hold the global step.
    """
    super(VariationalInference, self).initialize(*args, **kwargs)

    if var_list is None:
      # Traverse random variable graphs to get default list of variables.
      var_list = set()
      trainables = tf.trainable_variables()
      for z, qz in six.iteritems(self.latent_vars):
        if isinstance(z, RandomVariable):
          var_list.update(get_variables(z, collection=trainables))

        var_list.update(get_variables(qz, collection=trainables))

      for x, qx in six.iteritems(self.data):
        if isinstance(x, RandomVariable) and \
                not isinstance(qx, RandomVariable):
          var_list.update(get_variables(x, collection=trainables))

      var_list = list(var_list)

    self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

    if self.logging:
      summary_key = 'summaries_' + str(id(self))
      tf.summary.scalar("loss", self.loss, collections=[summary_key])
      for grad, var in grads_and_vars:
        # replace colons which are an invalid character
        tf.summary.histogram("gradient/" +
                             var.name.replace(':', '/'),
                             grad, collections=[summary_key])
        tf.summary.scalar("gradient_norm/" +
                          var.name.replace(':', '/'),
                          tf.norm(grad), collections=[summary_key])

      self.summarize = tf.summary.merge_all(key=summary_key)

    if optimizer is None and global_step is None:
      # Default optimizer always uses a global step variable.
      global_step = tf.Variable(0, trainable=False, name="global_step")

    if isinstance(global_step, tf.Variable):
      starter_learning_rate = 0.1
      learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                 global_step,
                                                 100, 0.9, staircase=True)
    else:
      learning_rate = 0.01

    # Build optimizer.
    if optimizer is None:
      optimizer = tf.train.AdamOptimizer(learning_rate)
    elif isinstance(optimizer, str):
      if optimizer == 'gradientdescent':
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
      elif optimizer == 'adadelta':
        optimizer = tf.train.AdadeltaOptimizer(learning_rate)
      elif optimizer == 'adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate)
      elif optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
      elif optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(learning_rate)
      elif optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)
      elif optimizer == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(learning_rate)
      else:
        raise ValueError('Optimizer class not found:', optimizer)
    elif not isinstance(optimizer, tf.train.Optimizer):
      raise TypeError("Optimizer must be str, tf.train.Optimizer, or None.")

    scope = "optimizer_" + str(id(self))
    with tf.variable_scope(scope):
      if not use_prettytensor:
        self.train = optimizer.apply_gradients(grads_and_vars,
                                               global_step=global_step)
      else:
        # Note PrettyTensor optimizer does not accept manual updates;
        # it autodiffs the loss directly.
        self.train = pt.apply_optimizer(optimizer, losses=[self.loss],
                                        global_step=global_step,
                                        var_list=var_list)

    self.reset.append(tf.variables_initializer(
        tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)))
dataX, dataY = utils.get_data_batches(NUM_DATA_PTS, BATCH_SIZE)


# Set up the neural network using PrettyTensor.
# Use a simple CNN with one hidden layer of 50 neurons.
input_t = tf.placeholder(tf.float32, (BATCH_SIZE, dataX[0].shape[1], dataX[0].shape[2], 1), name="input_t")
labels_t = tf.placeholder(tf.float32, (BATCH_SIZE, NUM_CLASSES), name="labels_t")
input_p = prettytensor.wrap(input_t)
hidden_p = (input_p
	.conv2d(3, 4, edges='VALID')
	.max_pool(2, 2)
	.flatten()
	.fully_connected(50))
softmax_p, loss_p = hidden_p.softmax_classifier(NUM_CLASSES, labels_t)
optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
optim_p = prettytensor.apply_optimizer(optimizer, losses=[loss_p])


# Train and evaluate the neural network.
with tf.Session() as sess:
	tf.initialize_all_variables().run()
	loss_over_time = []
	vloss_over_time = []
	for epoch in range(NUM_EPOCHS):
		# Save 1 batch for validation.
		for i in range(len(dataX)-1):
			loss, _ = sess.run([loss_p, optim_p], {
				input_t: dataX[i],
				labels_t: dataY[i]
			})
			loss_over_time.append(loss)
def main(_=None, weight_init=None, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=3,learning_rate=None):
    tf.reset_default_graph()
    input_placeholder  = tf.placeholder(tf.float32, [BATCH_SIZE, 2])
    output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])

    # Grab the data as numpy arrays.
    train_input, train_output = data_utils.mnist(training=True)
    test_input,  test_output  = data_utils.mnist(training=False)

    train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale)
    test_set  = ut.mnist_select_n_classes(test_input,  test_output,  NUM_CLASSES, min=data_min, scale=data_scale)
    train_input, train_output = train_set[1], train_set[0]
    test_input,  test_output  = test_set[1],  test_set[0]

    ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0])))

    visual_inputs, visual_output = train_set[1][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE]
    epoch_reconstruction = []

    EPOCH_SIZE = len(train_input) // BATCH_SIZE
    TEST_SIZE = len(test_input) // BATCH_SIZE

    ut.print_info('train: %s' % str(train_input.shape))
    ut.print_info('test:  %s' % str(test_input.shape))
    ut.print_info('output shape:  %s' % str(train_output[0].shape))

    assert visual_inputs.shape == input_placeholder.get_shape()
    assert len(train_input.shape) == len(input_placeholder.get_shape())
    assert len(test_input.shape) == len(input_placeholder.get_shape())
    assert visual_output.shape == output_placeholder.get_shape()
    assert len(train_output.shape) == len(output_placeholder.get_shape())
    assert len(test_output.shape) == len(output_placeholder.get_shape())

    with pt.defaults_scope(activation_fn=activation_f,
                           # batch_normalize=True,
                           # learned_moments_update_rate=0.0003,
                           # variance_epsilon=0.001,
                           # scale_after_normalization=True
                           ):
        with pt.defaults_scope(phase=pt.Phase.train):
            with tf.variable_scope("model") as scope:
                output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init)

    pretty_loss = loss(output_tensor, output_placeholder)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train = pt.apply_optimizer(optimizer, losses=[pretty_loss])

    init = tf.initialize_all_variables()
    runner = pt.train.Runner(save_path=FLAGS.save_path)

    best_q = 100000
    with tf.Session() as sess:
        sess.run(init)
        for epoch in xrange(epochs):
            # Shuffle the training data.

            if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs:
                reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output})
                epoch_reconstruction.append(reconstruct)
                ut.print_info('epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct)))

            train_input, train_output = data_utils.permute_data(
                (train_input, train_output))

            runner.train_model(
                train,
                pretty_loss,
                EPOCH_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output)
            )
            accuracy = runner.evaluate_model(
                pretty_loss,
                TEST_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output))
            ut.print_time('Accuracy after %d epoch %g%%' % (
                epoch + 1, accuracy * 100))
            if best_q > accuracy * 10:
                best_q = accuracy * 10


        ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output,
                                     save_params={'suf':'mn_trivs', 'act':activation_f, 'e':epochs, 'opt':optimizer,
                                                  'lr': learning_rate, 'init':weight_init, 'acu': int(best_q)})