예제 #1
0
    def __init__(self, mode='train'):
        """
        Args:
            mode: The value of the attribute "mode".
        """
        super().__init__(mode)

        # This network is not recurrent.
        if shared.data.sequence:
            raise CustomException("The network doesn't support sequence data.")

        # This is a GAN for images.
        img_dataset, _ = shared.data.is_image_dataset()
        if not img_dataset:
            raise CustomException("The network doesn't support datasets " + \
                                  "that do not have images as inputs.")

        self._latent_size = DCGAN._DEFAULT_LATENT_SIZE
        self._num_glayers = DCGAN._DEFAULT_NUM_GLAYERS
        self._compute_generator_layout()

        self._num_dlayers = DCGAN._DEFAULT_NUM_DLAYERS
        self._compute_discriminator_layout()

        self._use_biases = True
    def __init__(self, mode='train'):
        """
        Args:
            mode: The value of the attribute "mode".
        """
        super().__init__(mode)

        # This network is not recurrent.
        if shared.data.sequence:
            raise CustomException("The network doesn't support sequence data.")

        # This is a classifier!
        if not shared.data.classification:
            raise CustomException("The network does only support " + \
                                  "classification datasets.")

        # We use 2D convolutions. So we need to ensure, that the input data
        # can be considered 2-dimensional.
        if not np.size(shared.data.in_shape) in [2, 3]:
            raise CustomException("The network does only support " + \
                                  "datasets with an input shape that is " + \
                                  "either 2D or 3D (with the 3rd " + \
                                  "dimension representing channels).")

        self._num_layers = CNNClassifier._DEFAULT_NUM_LAYERS

        self._compute_network_layout()

        self._use_batchnorm = True
예제 #3
0
    def num_layers(self, value):
        """Setter for the attribute num_layers.

        Note, this method can only be called for a network, that has not been
        build yet.

        Note, that an autoencoder has typically a bottleneck. So one should
        prefer odd numbers of layers.

        The hidden layer sizes are all set to 100 after this method has been
        called. Ensure that you set them to meaningful values afterwards.

        Args:
            value: The number of layers in this network (incl. in- and output 
            layer).
        """
        if self._is_build:
            raise CustomException('The number of layers in a network can only'
                                  + ' be changed if the network has not been '
                                  + ' build yet.')

        if value < 2:
            raise CustomException('A network needs at least 2 layers ' +
                                  '(input and output)')

        self._num_layers = value
        
        # Initialize the hidden layer sizes.
        in_size = self._layer_sizes[0]
        self._layer_sizes = [100] * value
        self._layer_sizes[0] = in_size
        self._layer_sizes[-1] = in_size
예제 #4
0
    def test(self):
        """Evaluate the trained network using the whole test set.
        
        At the moment, this method simply computes the loss on the test set.
        """
        if self.is_training():
            raise CustomException('Method can only be called in inference ' + \
                                  'mode.')
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Testing autoencoder ...')

        sess = self._get_inference_session()
        if sess is None:
            logger.error('Could not create session. Testing aborted.')

        test_ins = shared.data.get_test_inputs()
        test_outs = shared.data.get_test_outputs()

        test_handle = sess.run(self._test_iter.string_handle())
        sess.run(self._test_iter.initializer,
             feed_dict={self._t_test_raw_in: test_ins,
                        self._t_test_raw_out: test_outs,
                        self._t_test_batch_size: shared.data.num_test_samples})

        ckpt_epoch = tf.train.global_step(sess, self._t_global_step)
        logger.info('The network has been trained for %d epochs.' % 
                    ckpt_epoch)

        [loss] = sess.run([self._t_loss], 
            feed_dict={self._t_handle: test_handle})

        logger.info('Loss on test dataset is: %f' % loss)

        # Plot example test images.
        img_dataset, _ = shared.data.is_image_dataset()
        if self.allow_plots and img_dataset:
            num_plots = 6

            # We have to reinitialize to change the batch size (seems to be
            # a cleaner solution than processing the whole validation set).
            sess.run(self._test_iter.initializer,
                feed_dict={self._t_test_raw_in: test_ins[:num_plots, :],
                           self._t_test_raw_out: test_outs[:num_plots, :],
                           self._t_test_batch_size: num_plots})
            [inputs, reconstructions, labels] = sess.run(
                [self._t_ds_inputs, self._t_outputs, self._t_ds_outputs], 
                feed_dict={self._t_handle: test_handle})

            dplt.plot_ae_images('Reconstructed Test Samples',
                                inputs, reconstructions, sample_outputs=labels,
                                interactive=True)

        logger.info('Testing autoencoder ... Done')
 def use_batchnorm(self, value):
     """Setter for the attribute use_batchnorm."""
     if self._is_build:
         raise CustomException('The use_batchnorm attribute can only be ' +
                               'changed if the network has not been ' +
                               'build yet.')
     self._use_batchnorm = value
예제 #6
0
    def num_dis_layers(self, value):
        """Setter for the attribute num_dis_layers.

        Note, this method can only be called for a network, that has not been
        build yet.

        Note, that this method will override all kernel sizes and filters that
        might have been set for the discriminator already.

        How do we construct a discriminator network with a certain depth?

        If N is the number of layers in the discriminator. Then There will be
        N-1 convolutional layers followed by a single fully-connected layer.
        The fully connected layer has a single output. The convolutional layers
        are meant to downsample the input. Therefore, they are strided, usually
        with stride 2 (to half the input). If a layer input is smaller than 4
        (in any dimension), then the stride is reduced to 1.
        All layers use 'same' padding, such that the output size can always
        be computed as follows (irrespective of the chosen kernel size):
            out_size = ceil(in_size / 2)

        Filter sizes start with 64 and double with every layer.

        The predefined kernel size is 5x5.
        """
        if self._is_build:
            raise CustomException(
                'The number of layers in a network can only' +
                ' be changed if the network has not been ' + ' build yet.')

        self._num_dlayers = value
        self._compute_discriminator_layout()
예제 #7
0
    def test(self):
        """Evaluate the trained network using the whole test set."""
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Testing MI Estimator ...')

        sess = self._get_inference_session()
        if sess is None:
            logger.error('Could not create session. Testing aborted.')

        test_ins = shared.data.get_test_inputs()
        test_outs = shared.data.get_test_outputs()

        test_handle = sess.run(self._test_iter.string_handle())
        sess.run(self._test_iter.initializer,
                 feed_dict={
                     self._t_test_raw_in: test_ins,
                     self._t_test_raw_out: test_outs,
                     self._t_test_batch_size: shared.data.num_test_samples
                 })

        ckpt_epoch = tf.train.global_step(sess, self._t_global_step)
        logger.info('The network has been trained for %d epochs.' % ckpt_epoch)

        real_mi, estimated_mi = sess.run([self._t_real_mi, self._t_mi],
                                         feed_dict={
                                             self._t_handle: test_handle,
                                             self._t_mi_known: True
                                         })

        logger.info('Real MI: %f' % real_mi)
        logger.info('Estimated MI on test set: %f' % estimated_mi)

        logger.info('Testing MI Estimator ... Done')
예제 #8
0
    def latent_size(self, value):
        """Setter for the attribute latent_size.

        Note, this method can only be called for a network, that has not been
        build yet.
        """
        if self._is_build:
            raise CustomException(
                'The latent size can only be changed if the ' +
                'network has not been build yet.')

        self._latent_size = value
    def test(self):
        """Evaluate the trained network using the whole test set."""
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Testing CNN Classifier ...')

        sess = self._get_inference_session()
        if sess is None:
            logger.error('Could not create session. Testing aborted.')

        test_ins = shared.data.get_test_inputs()
        test_outs = shared.data.get_test_outputs(use_one_hot=True)

        test_handle = sess.run(self._test_iter.string_handle())
        sess.run(self._test_iter.initializer,
                 feed_dict={
                     self._t_test_raw_in: test_ins,
                     self._t_test_raw_out: test_outs,
                     self._t_test_batch_size: shared.data.num_test_samples
                 })

        ckpt_epoch = tf.train.global_step(sess, self._t_global_step)
        logger.info('The network has been trained for %d epochs.' % ckpt_epoch)

        acc, loss = sess.run([self._t_accuracy, self._t_loss],
                             feed_dict={self._t_handle: test_handle})

        logger.info('Test Accuracy: %f' % acc)
        logger.info('Loss on test set: %f' % loss)

        if self.allow_plots:
            num_plots = 8

            # We have to reinitialize to change the batch size (seems to be
            # a cleaner solution than processing the whole validation set).
            sess.run(self._test_iter.initializer,
                     feed_dict={
                         self._t_test_raw_in: test_ins[:num_plots, :],
                         self._t_test_raw_out: test_outs[:num_plots, :],
                         self._t_test_batch_size: num_plots
                     })
            [inps, lbls, preds] = sess.run(
                [self._t_ds_inputs, self._t_ds_outputs, self._t_output_probs],
                feed_dict={self._t_handle: test_handle})

            shared.data.plot_samples('Test Samples',
                                     inps,
                                     outputs=lbls,
                                     predictions=preds,
                                     interactive=True)

        logger.info('Testing CNN Classifier ... Done')
예제 #10
0
    def set_hidden_layer_size(self, layer_ind, layer_size):
        """Set the size of an hidden layer.

        Note, the index of the first hidden layer is 1 (the last has index
        num_layers-2)!

        The same restrictions as for the num_layers setter apply.

        Args:
            layer_ind: The index of the hidden layer.
            layer_size: The new size of the layer.
        """
        if self._is_build:
            raise CustomException('The hidden layer size can only be changed '
                                  + 'if the network has not been build yet.')

        if layer_ind < 1 or layer_ind > self.num_layers-2:
            raise CustomException('Hidden layers have an index between 1 and '
                                  + str(self.num_layers-2) + '.')

        self._layer_sizes[layer_ind] = layer_size
예제 #11
0
    def run(self, inputs):
        """Run the network with the given inputs.

        Args:
            inputs: Samples that align with the dataset (2D numpy array).

        Returns:
            The outputs of the network as 2D numpy array.
        """
        if self.is_training():
            raise CustomException('Method can only be called in inference ' + \
                                  'mode.')
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        sess = self._get_inference_session()
        if sess is None:
            logger.error('Could not create session. Inference aborted.')

        [outputs] = sess.run([self._t_outputs],
            feed_dict={self._t_inputs: inputs})

        return outputs
예제 #12
0
    def __init__(self, mode='train'):
        """
        Args:
            mode: The value of the attribute "mode".
        """
        super().__init__(mode)

        # This network is not recurrent.
        if shared.data.sequence:
            raise CustomException("The network doesn't support sequence data.")

        self._num_layers = SimpleAE._DEFAULT_NUM_HLAYERS + 2
        in_size = np.prod(shared.data.in_shape)
        self._layer_sizes = [in_size] +  SimpleAE._DEFAULT_SIZE_HLAYERS + \
            [in_size]
예제 #13
0
    def num_layers(self, value):
        """Setter for the attribute num_layers.

        Note, that this setter will overwrite all layer sizes, that might
        have been already set.

        Here is how the default network is set up.
        The default network will have 100 neurons in its first fully connected
        layer and 1 output neuron in the last layer. In between, the layer
        sizes shrink linearly.
        """
        if self._is_build:
            raise CustomException('The num_layers attribute can only be ' +
                                  'changed if the network has not been ' +
                                  'build yet.')
        self._num_layers = value

        self._compute_network_layout()
    def num_layers(self, value):
        """Setter for the attribute num_layers.

        Note, that this setter will overwrite all layer-specific settings, that
        might have been already set.

        Here is how the default network is set up.
        Both, convolutional and pooling layers use "same" padding. While
        pooling layers have a stride of 2, convolutional layers have a stride
        of 2.

        The default kernel sizes are: 5x5 (conv layers) and 2x2 (pool layers).
        The number of filters is set to 32 in the first layer and is then
        doubled every layer.
        """
        if self._is_build:
            raise CustomException('The num_layers attribute can only be ' +
                                  'changed if the network has not been ' +
                                  'build yet.')
        self._num_layers = value

        self._compute_network_layout()
예제 #15
0
    def set_scope_name(self, sname):
        """Set the name of the scope defined by tf.name_scope. This scope name
        can later be used to distinguish different network instances, for
        example in Tensorboard.

        Note, this method has to be called before the network was built!
        
        Note, this affects only the major name scope within a graph. Different
        class instances (and therefore different graphs) are still managed by
        different Tensorbiard instances.

        Args:
            sname (default: 'graph_scope'): A string, defining the named scope
                of the graph.
        """
        # Note, we assume here, that all sublcasses correctly set the is_build
        # attribute.
        if self._is_build:
            raise CustomException('The name scope has to be set before the ' +
                                  'network was built.')

        self._scope_name = sname
예제 #16
0
    def train(self, num_iter=10000, batch_size=32, init_lr=20, \
              lr_decay_interval=1000, val_interval=1000, val_bs=1000):
        """Train the network.

        The network is trained via gradient descent with decreasing learning
        rate.

        Note, if no validation set is available, the test set will be used.

        Args:
            num_iter: The number of training iterations.
            batch_size: The training batch size.
            init_lr: The initial learning rate.
            lr_decay_interval: After how many iterations the learning rate
                should be halved. If None, no weight decay is applied.
            val_interval: How often the training status should be validated.
            val_bs: The batch size of the validation set to use.
        """
        if not self.is_training():
            raise CustomException('Method can only be called in training ' + \
                                  'mode.')
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Training autoencoder ...')

        # Learning Rate
        lr = init_lr

        with self._graph.as_default() as g:
            #print([v.name for v in tf.trainable_variables()])

            summary_writer = tf.summary.FileWriter( \
                self._get_summary_dir('train_summary'), g)

            checkpoint_saver = tf.train.Saver(max_to_keep=5, \
                                              keep_checkpoint_every_n_hours=3)

            init_op = tf.global_variables_initializer()

            train_step = tf.train.GradientDescentOptimizer( \
                learning_rate=self._t_learning_rate).minimize(self._t_loss,
                    global_step=self._t_global_step)

        self._build_validation_graph(val_bs)

        with tf.Session(graph=self._graph, config=self._get_config_proto()) \
                as sess:
            # Restore training if requested.
            iter_start, iter_end = self._init_training(sess, num_iter, init_op,
                self._t_global_step, checkpoint_saver)

            # Initialize training set.
            train_handle = sess.run(self._train_iter.string_handle())
            sess.run(self._train_iter.initializer,
                 feed_dict={
                    self._t_train_raw_in: shared.data.get_train_inputs(),
                    self._t_train_raw_out: 
                        shared.data.get_train_outputs(),
                    self._t_train_batch_size: batch_size})

            for i in range(iter_start, iter_end):
                if i % val_interval == 0:
                    checkpoint_saver.save(sess, os.path.join( \
                        self._checkpoint_dir, 'model'), global_step=i)

                    self._validate_training(i)

                _, summary = sess.run([train_step, self._t_summaries],
                    feed_dict={self._t_handle: train_handle,
                               self._t_learning_rate: lr})
                summary_writer.add_summary(summary, i)

                # Exponential weight decay.
                if not lr_decay_interval is None and \
                        i > 0 and i % lr_decay_interval == 0:
                    lr /= 2
                    logger.info('Epoch %d: learning rate decayed to: %f' % \
                                (i, lr))

            checkpoint_saver.save(sess, os.path.join( \
                self._checkpoint_dir, 'model'), global_step=iter_end)
            logger.info('Training ends after %d iterations.' % iter_end)

        summary_writer.close()

        # Wait until all validation threads are done (so that we don't close
        # the summary writer too early).
        [t.join() for t in self._val_threads]
        self._val_summary_writer.close()

        logger.info('Training autoencoder ... Done')
예제 #17
0
    def test(self):
        """Evaluate the trained network using the whole test set.
        
        Note, the we sample random latent input for the generator.
        """
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Testing DCGAN ...')

        sess = self._get_inference_session()
        if sess is None:
            logger.error('Could not create session. Testing aborted.')

        test_ins = shared.data.get_test_inputs()
        test_outs = shared.data.get_test_outputs()
        test_latent_inputs = self.sample_latent(shared.data.num_test_samples)

        test_handle = sess.run(self._test_iter.string_handle())
        sess.run(self._test_iter.initializer,
                 feed_dict={
                     self._t_test_raw_in: test_ins,
                     self._t_test_raw_out: test_outs,
                     self._t_test_batch_size: shared.data.num_test_samples
                 })

        ckpt_epoch = tf.train.global_step(sess, self._t_global_step)
        logger.info('The network has been trained for %d epochs.' % ckpt_epoch)

        # Note, that subclasses (such as a WassersteinGan), don't have a
        # meaningful accuracy.
        if self._t_accuracy is None:
            g_loss, d_loss = sess.run([self._g_loss, self._d_loss],
                                      feed_dict={
                                          self._g_inputs: test_latent_inputs,
                                          self._t_handle: test_handle
                                      })
        else:
            acc, g_loss, d_loss = sess.run( \
                [self._t_accuracy, self._g_loss, self._d_loss],
                feed_dict={self._g_inputs: test_latent_inputs,
                           self._t_handle: test_handle})

            logger.info('Test Accuracy: %f' % acc)
        logger.info('Generator loss on test set: %f' % g_loss)
        logger.info('Discriminator loss on test set: %f' % d_loss)

        if self.allow_plots:
            num_plots = min(8, test_latent_inputs.shape[0])

            Z_in = test_latent_inputs[:num_plots, :]

            # We have to reinitialize to change the batch size (seems to be
            # a cleaner solution than processing the whole validation set).
            sess.run(self._test_iter.initializer,
                     feed_dict={
                         self._t_test_raw_in: test_ins[:num_plots, :],
                         self._t_test_raw_out: test_outs[:num_plots, :],
                         self._t_test_batch_size: num_plots
                     })
            real_imgs, real_lbls, fake_imgs, fake_dis_outs, real_dis_outs = \
                sess.run([self._t_ds_inputs, self._t_ds_outputs,
                          self._g_outputs, self._d_outputs_fake,
                          self._d_outputs_real],
                feed_dict={self._g_inputs: Z_in,
                           self._t_handle: test_handle})

            dplt.plot_gan_images('Test Samples',
                                 real_imgs,
                                 fake_imgs,
                                 real_outputs=real_lbls,
                                 real_dis_outputs=real_dis_outs,
                                 fake_dis_outputs=fake_dis_outs,
                                 shuffle=True,
                                 interactive=True,
                                 figsize=(10, 12))

        logger.info('Testing DCGAN ... Done')
    def train(self, num_iter=10000, batch_size=32, learning_rate=0.001, \
              momentum=0.9, val_interval=1000, val_bs=1000):
        """Train the network.

        The network is trained via a Momentum Optimizer.

        Note, if no validation set is available, the test set will be used.

        Args:
            num_iter: The number of training iterations.
            batch_size: The training batch size.
            learning_rate: See docs of "tf.train.MomentumOptimizer".
            momentum: See docs of "tf.train.MomentumOptimizer".
            val_interval: How often the training status should be validated.
            val_bs: The batch size of the validation set to use.
        """
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Training CNN Classifier ...')

        with self._graph.as_default() as g:
            #print([v.name for v in tf.trainable_variables()])

            summary_writer = tf.summary.FileWriter( \
                self._get_summary_dir('train_summary',
                    rm_existing=not self.continue_training), g)

            self._init_validation(val_bs)

            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)

            # We need to add the update_ops for the batchnorm moving averages
            # to the training steps. Otherwise, they won't be executed.
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_step = optimizer.minimize(
                    self._t_loss, global_step=self._t_global_step)

            init_op = tf.global_variables_initializer()

            checkpoint_saver = tf.train.Saver(max_to_keep=5, \
                                              keep_checkpoint_every_n_hours=3)

        with tf.Session(graph=self._graph, config=self._get_config_proto()) \
                as sess:
            # Restore training if requested.
            iter_start, iter_end = self._init_training(sess, num_iter, init_op,
                                                       self._t_global_step,
                                                       checkpoint_saver)

            # Initialize training set.
            train_handle = sess.run(self._train_iter.string_handle())
            sess.run(self._train_iter.initializer,
                     feed_dict={
                         self._t_train_raw_in:
                         shared.data.get_train_inputs(),
                         self._t_train_raw_out:
                         shared.data.get_train_outputs(use_one_hot=True),
                         self._t_train_batch_size:
                         batch_size
                     })

            for i in range(iter_start, iter_end):
                if i % val_interval == 0:
                    checkpoint_saver.save(sess, os.path.join( \
                        self._checkpoint_dir, 'model'), global_step=i)

                    self._validate_training_process(sess, i)

                elif i % 100 == 0 and i > 0:
                    logger.info('Running training epoch: %d.' % i)

                _, summary = sess.run([train_step, self._t_summaries],
                                      feed_dict={
                                          self._t_handle: train_handle,
                                          self._t_mode: True
                                      })
                summary_writer.add_summary(summary, i)

            checkpoint_saver.save(sess, os.path.join( \
                self._checkpoint_dir, 'model'), global_step=iter_end)
            logger.info('Training ends after %d iterations.' % iter_end)

        summary_writer.close()
        self._val_summary_writer.close()

        logger.info('Training CNN Classifier ... Done')
예제 #19
0
    def num_gen_layers(self, value):
        """Setter for the attribute num_gen_layers.

        Note, this method can only be called for a network, that has not been
        build yet.

        The first layer will be considered to be a fully-connected layer. All
        following layers are transpose convolutional layers.

        Note, that this method will override all kernel sizes and filters that
        might have been set for the generator already.

        How do we initialize an arbitrary deep generator network?

        Assume the dataset has input images of the shape [W,H,C]. This shape
        should be the output of the final transpose convolutional layer.

        The number of filters in each transpose convolutional (TC) layer is set
        the following way:
            The last one has C filters. The one before has 128. The one before
            this one has 256, ...
        Note, that we also assign a number of filters to the initial
        fully-connected layer in this way (as this one also outputs data via
        many channels).

        The usual configuration is, that each TC layer should roughly double
        its input size. If the output shape is already smaller than 4 (for
        either width or height), then the stride is set to 1 (and padding
        set to 'SAME', such that the size doesn't change).
        In all other cases, the stride is set to 2. If an output size is
        even, then the padding will be set to 'SAME'. If it is odd, the padding
        is going to be 'VALID' with a fixed kernel size of 3. This comes out
        of the equation:
            out_size = s*(in_size-1) + k - 2p
        where s - stride, k - kernel size, p - padding.
        Note, that valid padding means p = 0, such that we have (s=2, k=3):
            out_size = 2*(in_size-1) + 3 = 2 * in_size + 1
        Hence, we set the in_size to out_size // 2.

        A special case is if the parity of width and height is different. Then
        the padding is set to valid and the even size gets a kernel size of 2
        assigned, the odd one a kernel size of 3. This can again be verified
        with the above equations.

        Given our network construction above, we can only set the kernel size
        for layers that have 'SAME' padding.

        The default kernel size for the case of 'same' padding is 5x5.

        Args:
            value: The number of layers in the generator network.
        """
        if self._is_build:
            raise CustomException(
                'The number of layers in a network can only' +
                ' be changed if the network has not been ' + ' build yet.')

        if value < 1:
            raise CustomException('A generator needs at least 1 layer.')

        self._num_glayers = value
        self._compute_generator_layout()
예제 #20
0
    def train(self, num_iter=10000, batch_size=128, learning_rate=0.0002, \
              beta1=0.5, beta2=0.999, val_interval=1000, val_bs=1000):
        """Train the network.

        The network is trained via the Adam optimizer.

        Note, if no validation set is available, the test set will be used.

        Args:
            num_iter: The number of training iterations.
            batch_size: The training batch size.
            learning_rate: See docs of "tf.train.AdamOptimizer".
            beta1: See docs of "tf.train.AdamOptimizer".
            beta2: See docs of "tf.train.AdamOptimizer".
            val_interval: How often the training status should be validated.
            val_bs: The batch size of the validation set to use.
        """
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Training DCGAN ...')

        with self._graph.as_default() as g:
            #print([v.name for v in tf.trainable_variables()])

            summary_writer = tf.summary.FileWriter( \
                self._get_summary_dir('train_summary',
                                      rm_existing=not self.continue_training),
                                      g)

            self._init_validation(val_bs)

            # TODO Such a dictionary should be part of the arguments of this
            # method to allow for easier choices of the used optimizer.
            op_params = {
                'learning_rate': learning_rate,
                'beta1': beta1,
                'beta2': beta2
            }
            gen_optimizer, dis_optimizer = self._get_optimizers(op_params)

            # We need to add the update_ops for the batchnorm moving averages
            # to the training steps. Otherwise, they won't be executed.
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                gen_train_step = gen_optimizer.minimize(
                    self._g_loss,
                    global_step=self._t_global_step,
                    var_list=self._g_vars)
                dis_train_step = dis_optimizer.minimize(self._d_loss,
                                                        var_list=self._d_vars)

            init_op = tf.global_variables_initializer()

            checkpoint_saver = tf.train.Saver(max_to_keep=5, \
                                              keep_checkpoint_every_n_hours=3)

        with tf.Session(graph=self._graph, config=self._get_config_proto()) \
                as sess:
            # Restore training if requested.
            iter_start, iter_end = self._init_training(sess, num_iter, init_op,
                                                       self._t_global_step,
                                                       checkpoint_saver)

            # Initialize training set.
            train_handle = sess.run(self._train_iter.string_handle())
            sess.run(self._train_iter.initializer,
                     feed_dict={
                         self._t_train_raw_in: shared.data.get_train_inputs(),
                         self._t_train_raw_out:
                         shared.data.get_train_outputs(),
                         self._t_train_batch_size: batch_size
                     })

            for i in range(iter_start, iter_end):
                if i % val_interval == 0:
                    checkpoint_saver.save(sess, os.path.join( \
                        self._checkpoint_dir, 'model'), global_step=i)

                    self._validate_training_process(sess, i)

                elif i % 100 == 0 and i > 0:
                    logger.info('Running training epoch: %d.' % i)

                _, _, summary = sess.run( \
                    [gen_train_step, dis_train_step, self._t_summaries],
                    feed_dict={self._g_inputs: self.sample_latent(batch_size),
                               self._t_handle: train_handle,
                               self._t_mode: True})
                summary_writer.add_summary(summary, i)

            checkpoint_saver.save(sess, os.path.join( \
                self._checkpoint_dir, 'model'), global_step=iter_end)
            logger.info('Training ends after %d iterations.' % iter_end)

        summary_writer.close()
        self._val_summary_writer.close()

        logger.info('Training DCGAN ... Done')
def plot_ae_images(title,
                   sample_inputs,
                   reconstructions,
                   sample_outputs=None,
                   num_samples_per_row=3,
                   show=True,
                   filename=None,
                   interactive=False,
                   figsize=(10, 6),
                   outer_space=(0.4, 0.2),
                   inner_space=(0.2, 0.4)):
    """Plot input-output pairs of an autoencoder network.
    
    Args:
        title: The title of the whole figure.
        sample_inputs: A 2D numpy array, where each row is an input sample of a
            dataset, that encodes single images as inputs.
        reconstructions: The corresponding outputs of the Autoencoder network.
        sample_outputs (optional): The actual outputs according to the dataset.
            This is only used, if the dataset is a classification dataset, such
            that the class labels can be added to the titles.
        num_samples_per_row (default: 4): Maximum number of samples plotted
            per row in the generated figure.
        show (default: True): Whether the plot should be shown.
        filename (optional): If provided, the figure will be stored under
            this filename.
        interactive (default: False): Turn on interactive mode. We mainly
            use this option to ensure that the program will run in
            background while figure is displayed. The figure will be
            displayed until another one is displayed, the user closes it or
            the program has terminated. If this option is deactivated, the
            program will freeze until the user closes the figure.
            Note, if using the iPython inline backend, this option has no
            effect.
        figsize (default: (10, 6)): A tuple, determining the size of the
            figure in inches.
        outer_space (default: (0.4, 0.2)): A tuple. Defines the outer grid
            spacing for the plot (width, height).
        inner_space (default: (0.2, 0.2)): Same as outer_space, just for the
            inner grid space.
    """
    data = shared.data

    in_is_img, _ = data.is_image_dataset()

    if not in_is_img or data.sequence:
        raise CustomException('This method can only be called for datasets ' \
                              + 'with single images as inputs.')

    # Reverse one-hot encoding.
    if data.is_one_hot:
        if sample_outputs is not None and \
                sample_outputs.shape[1] == data.num_classes:
            sample_outputs = data._to_one_hot(sample_outputs, True)

    num_plots = sample_inputs.shape[0]
    num_cols = int(min(num_plots, num_samples_per_row))
    num_rows = int(np.ceil(num_plots / num_samples_per_row))

    fig = plt.figure(figsize=figsize)
    outer_grid = gridspec.GridSpec(num_rows,
                                   num_cols,
                                   wspace=outer_space[0],
                                   hspace=outer_space[1])

    # The 'y' is a dirty hack to ensure, that the titles are not overlapping.
    plt.suptitle(title, size=20, y=1.1)
    if interactive:
        plt.ion()

    for i in range(num_plots):
        inner_grid = gridspec.GridSpecFromSubplotSpec(
            1,
            2,
            subplot_spec=outer_grid[i],
            wspace=inner_space[0],
            hspace=inner_space[1])

        subtitle = 'Sample %d' % i
        if data.classification:
            label = int(np.asscalar(sample_outputs[i]))
            subtitle += ' (Class %d)' % label

        ax = plt.Subplot(fig, outer_grid[i])
        ax.set_title(subtitle, fontsize=16)
        ax.set_axis_off()
        fig.add_subplot(ax)

        ax = plt.Subplot(fig, inner_grid[0])
        ax.set_axis_off()
        ax.set_title('Original')
        ax.imshow(np.squeeze(np.reshape(sample_inputs[i, :], data.in_shape)))
        fig.add_subplot(ax)

        ax = plt.Subplot(fig, inner_grid[1])
        ax.set_axis_off()
        ax.set_title('Reconstruction')
        ax.imshow(np.squeeze(np.reshape(reconstructions[i, :], data.in_shape)))
        fig.add_subplot(ax)

    if show:
        plt.show()

    if filename is not None:
        plt.savefig(filename, bbox_inches='tight')
def plot_gan_images(title,
                    real_imgs,
                    fake_imgs,
                    real_outputs=None,
                    real_dis_outputs=None,
                    fake_dis_outputs=None,
                    shuffle=False,
                    num_samples_per_row=4,
                    show=True,
                    filename=None,
                    interactive=False,
                    figsize=(10, 6),
                    spacing=(0.4, 0.4)):
    """Plot real and fake samples from a GAN network.
    
    Args:
        title: The title of the whole figure.
        real_imgs: A 2D numpy array, where each row is an input sample from a
            dataset, that encodes single images as inputs.
        fake_imgs: A 2D numpy array, where the images are generated by the
            generator network.
        real_outputs (optional): The actual outputs of the real images
            according to the dataset.
            This is only used, if the dataset is a classification dataset, such
            that the class labels can be added to the titles.
        real_dis_outputs (optional): The output confidences of the
            discriminator network for real images.
        fake_dis_outputs (optional): Same as "real_dis_outputs" for fake
            images.
        shuffle: Whether the order of images should be shuffled randomly.
        num_samples_per_row (default: 4): Maximum number of samples plotted
            per row in the generated figure.
        show (default: True): Whether the plot should be shown.
        filename (optional): If provided, the figure will be stored under
            this filename.
        interactive (default: False): Turn on interactive mode. We mainly
            use this option to ensure that the program will run in
            background while figure is displayed. The figure will be
            displayed until another one is displayed, the user closes it or
            the program has terminated. If this option is deactivated, the
            program will freeze until the user closes the figure.
            Note, if using the iPython inline backend, this option has no
            effect.
        figsize (default: (10, 6)): A tuple, determining the size of the
            figure in inches.
        spacing (default: (0.2, 0.2)): A tuple. Defines the spacing between
            subplots (width, height).
    """
    data = shared.data

    in_is_img, _ = data.is_image_dataset()

    if not in_is_img or data.sequence:
        raise CustomException('This method can only be called for datasets ' \
                              + 'with single images as inputs.')

    # Reverse one-hot encoding.
    if data.is_one_hot:
        if real_outputs is not None and \
                real_outputs.shape[1] == data.num_classes:
            real_outputs = data._to_one_hot(real_outputs, True)

    num_plots = real_imgs.shape[0] + fake_imgs.shape[0]
    num_cols = int(min(num_plots, num_samples_per_row))
    num_rows = int(np.ceil(num_plots / num_samples_per_row))

    real_or_fake = np.concatenate(
        [np.ones(real_imgs.shape[0]),
         np.zeros(fake_imgs.shape[0])])
    if shuffle:
        np.random.shuffle(real_or_fake)
    real_ind = 0
    fake_ind = 0

    fig = plt.figure(figsize=figsize)

    plt.suptitle(title, size=20)
    plt.subplots_adjust(wspace=spacing[0], hspace=spacing[1])

    if interactive:
        plt.ion()

    for i in range(num_plots):
        ax = fig.add_subplot(num_rows, num_cols, i + 1)

        if real_or_fake[i]:
            img = real_imgs[real_ind, :]
            if real_outputs is None or not data.classification:
                subtitle = 'Real'
            else:
                label = int(np.asscalar(real_outputs[real_ind]))
                subtitle = 'Real (Class %d)' % (label)
            if real_dis_outputs is not None:
                subtitle += '\nDiscriminator: %.4f' % \
                    round(np.asscalar(real_dis_outputs[real_ind]), 4)
            real_ind += 1
        else:
            img = fake_imgs[fake_ind, :]
            subtitle = 'Fake'
            if fake_dis_outputs is not None:
                subtitle += '\nDiscriminator: %.4f' % \
                    round(np.asscalar(fake_dis_outputs[fake_ind]), 4)
            fake_ind += 1

        ax.set_axis_off()
        ax.set_title(subtitle)
        ax.imshow(np.squeeze(np.reshape(img, data.in_shape)))

    if show:
        plt.show()

    if filename is not None:
        plt.savefig(filename, bbox_inches='tight')
예제 #23
0
    def train(self, num_iter=10000, batch_size=64, learning_rate=0.00005, \
              n_critic=5, clip_val=0.01, val_interval=1000, val_bs=1000):
        """Train the network.

        The network is trained via the RMSProp optimizer.

        Note, if no validation set is available, the test set will be used.

        Args:
            num_iter: The number of training iterations.
            batch_size: The training batch size.
            learning_rate: See docs of "tf.train.RMSPropOptimizer".
            n_critic: The number of update steps for the critic per iteration.
                Note, that the critic should be trained to convergence before
                updating the discriminator.Note, that this number might be
                sporadically changed by the code in the function.
            clip_val: This implementation applies weight clipping to ensure
                the Lipschitz constraint of the critic (as proposed in the
                original paper).
            val_interval: How often the training status should be validated.
            val_bs: The batch size of the validation set to use.
        """
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Training WassersteinGAN ...')

        with self._graph.as_default() as g:
            summary_writer = tf.summary.FileWriter( \
                self._get_summary_dir('train_summary',
                                      rm_existing=not self.continue_training),
                                      g)

            self._init_validation(val_bs)

            # We need an additional operation, that clips the weights of the
            # critic after each training step.
            critic_w_clipping_op = \
                [w.assign( \
                        tf.clip_by_value(w, -clip_val, clip_val)) \
                 for w in self._d_vars]

            op_params = {'learning_rate': learning_rate}
            gen_optimizer, dis_optimizer = self._get_optimizers(op_params)

            # We need to add the update_ops for the batchnorm moving averages
            # to the training steps. Otherwise, they won't be executed.
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                gen_train_step = gen_optimizer.minimize(
                    self._g_loss,
                    global_step=self._t_global_step,
                    var_list=self._g_vars)
                dis_train_step = dis_optimizer.minimize(self._d_loss,
                                                        var_list=self._d_vars)

            init_op = tf.global_variables_initializer()

            checkpoint_saver = tf.train.Saver(max_to_keep=5, \
                                              keep_checkpoint_every_n_hours=3)

        with tf.Session(graph=self._graph, config=self._get_config_proto()) \
                as sess:
            # Restore training if requested.
            iter_start, iter_end = self._init_training(sess, num_iter, init_op,
                                                       self._t_global_step,
                                                       checkpoint_saver)

            # Initialize training set.
            train_handle = sess.run(self._train_iter.string_handle())
            sess.run(self._train_iter.initializer,
                     feed_dict={
                         self._t_train_raw_in: shared.data.get_train_inputs(),
                         self._t_train_raw_out:
                         shared.data.get_train_outputs(),
                         self._t_train_batch_size: batch_size
                     })

            for i in range(iter_start, iter_end):
                if i % val_interval == 0:
                    checkpoint_saver.save(sess, os.path.join( \
                        self._checkpoint_dir, 'model'), global_step=i)

                    self._validate_training_process(sess, i)

                elif i % 100 == 0 and i > 0:
                    logger.info('Running training epoch: %d.' % i)

                # Train critic until convergence:
                # This trick is borrowed from the original code:
                #   https://git.io/fAfst
                # Allegedly, this should only ensure that the critic becomes
                # strong very quickly, such that we have nice critic loss plots
                # (which correspond to the EM distance).
                n_critic_used = n_critic
                if i < 25 or i % 500 == 0:
                    n_critic_used = 100

                for t in range(n_critic_used):
                    feed_dict = {
                        self._g_inputs: self.sample_latent(batch_size),
                        self._t_handle: train_handle,
                        self._t_mode: True
                    }

                    if t == 0:
                        _, summary = sess.run(
                            [dis_train_step, self._t_summaries],
                            feed_dict=feed_dict)
                        summary_writer.add_summary(summary, i)
                    else:
                        sess.run([dis_train_step], feed_dict=feed_dict)

                    # Clip weights.
                    sess.run(critic_w_clipping_op)

                # Train generator.
                # FIXME: Why does tf want me to feed real images into this run?
                sess.run(
                    [gen_train_step],
                    feed_dict={
                        self._g_inputs: self.sample_latent(batch_size),
                        self._t_handle: train_handle,
                        self._t_mode: True
                    })

            checkpoint_saver.save(sess, os.path.join( \
                self._checkpoint_dir, 'model'), global_step=iter_end)
            logger.info('Training ends after %d iterations.' % iter_end)

        summary_writer.close()
        self._val_summary_writer.close()

        logger.info('Training WassersteinGAN ... Done')
예제 #24
0
    def train(self, num_iter=10000, batch_size=256, learning_rate=0.0001, \
              beta1=0.5, beta2=0.999, val_interval=1000, val_bs=256):
        """Train the network.

        The network is trained via the Adam optimizer.

        Note, if no validation set is available, the test set will be used.

        Args:
            num_iter: The number of training iterations.
            batch_size: The training batch size.
            learning_rate: See docs of "tf.train.AdamOptimizer".
            beta1: See docs of "tf.train.AdamOptimizer".
            beta2: See docs of "tf.train.AdamOptimizer".
            val_interval: How often the training status should be validated.
            val_bs: The batch size of the validation set to use.
        """
        if not self._is_build:
            raise CustomException('Network has not been build yet.')

        logger.info('Training MI Estimator ...')

        with self._graph.as_default() as g:
            summary_writer = tf.summary.FileWriter( \
                self._get_summary_dir('train_summary',
                                      rm_existing=not self.continue_training),
                                      g)

            self._init_validation(val_bs)

            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                               beta1=beta1,
                                               beta2=beta2)
            train_step = optimizer.minimize(self._t_loss,
                                            global_step=self._t_global_step)

            init_op = tf.global_variables_initializer()

            checkpoint_saver = tf.train.Saver(max_to_keep=5, \
                                              keep_checkpoint_every_n_hours=3)

        with tf.Session(graph=self._graph, config=self._get_config_proto()) \
                as sess:
            # Restore training if requested.
            iter_start, iter_end = self._init_training(sess, num_iter, init_op,
                                                       self._t_global_step,
                                                       checkpoint_saver)

            # Initialize training set.
            train_handle = sess.run(self._train_iter.string_handle())
            sess.run(self._train_iter.initializer,
                     feed_dict={
                         self._t_train_raw_in: shared.data.get_train_inputs(),
                         self._t_train_raw_out:
                         shared.data.get_train_outputs(),
                         self._t_train_batch_size: batch_size
                     })

            for i in range(iter_start, iter_end):
                if i % val_interval == 0:
                    checkpoint_saver.save(sess, os.path.join( \
                        self._checkpoint_dir, 'model'), global_step=i)

                    self._validate_training_process(sess, i)

                #elif i % 100 == 0 and i > 0:
                #    logger.info('Running training epoch: %d.' % i)

                _, summary = sess.run( \
                    [train_step, self._t_summaries],
                    feed_dict={self._t_handle: train_handle,
                               self._t_mode: True,
                               self._t_mi_known: True})
                summary_writer.add_summary(summary, i)

            checkpoint_saver.save(sess, os.path.join( \
                self._checkpoint_dir, 'model'), global_step=iter_end)
            logger.info('Training ends after %d iterations.' % iter_end)

        summary_writer.close()
        self._val_summary_writer.close()

        logger.info('Training MI Estimator ... Done')