コード例 #1
0
def train_rgan(gan_model, dataset, n_epochs):
    
    generator_optimizer = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, clipnorm=1.) 
    discriminator_optimizer = keras.optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True, clipnorm=1.) 
    recurrent_generator, recurrent_discriminator = gan_model.layers
    
    # Keep results for plotting
    train_discriminator_loss_results = []
    train_generator_loss_results = []
    
    for epoch in range(n_epochs):
        epoch_discriminator_loss_avg = Mean()
        epoch_generator_loss_avg = Mean()

        for x_batch, mask_batch in dataset:
            no, seq_len , dim = x_batch.shape
            x_batch = cast(x_batch, float32)
            # phase 1 - training the discriminator
            noise = noise_generator(no, seq_len, dim)
            generated_samples = recurrent_generator(noise)
            x_fake_and_real = concat([generated_samples, x_batch], axis=1)
            y1 = cast(reshape(constant([[0.]] * seq_len + [[1.]] * seq_len), [seq_len*2, 1]), float32)
            y1 = tf.broadcast_to(y1, [no, seq_len*2, 1])
            mask1 = tf.ones([no, seq_len])
            mask_fake_and_real = concat([mask1, mask_batch], axis=1)
            recurrent_discriminator.trainable = True
            discriminator_loss_value, discriminator_grads = grad(recurrent_discriminator, x_fake_and_real, y1, mask_fake_and_real) 
            discriminator_optimizer.apply_gradients(zip(discriminator_grads, recurrent_discriminator.trainable_variables))
            # phase 2 - training the generator
            noise = noise_generator(no, seq_len, dim)
            y2 = cast(reshape( constant([[1.]] * seq_len), [seq_len, 1]), float32)
            y2 = tf.broadcast_to(y2, [no, seq_len, 1])
            recurrent_discriminator.trainable = False
            generator_loss_value, generator_grads = grad(gan_model, noise, y2, mask1) 
            generator_optimizer.apply_gradients(zip(generator_grads, gan_model.trainable_variables))
            # Track progress: Add current batch loss
            epoch_discriminator_loss_avg.update_state(discriminator_loss_value)
            epoch_generator_loss_avg.update_state(generator_loss_value)  
            
        # End epoch
        train_discriminator_loss_results.append(epoch_discriminator_loss_avg.result())
        train_generator_loss_results.append(epoch_generator_loss_avg.result())
        
        if epoch % 50 == 0:
            print("RGAN Epoch {:03d}: Discriminator Loss: {:.3f}".format(epoch, epoch_discriminator_loss_avg.result() ) , file=sys.stdout)
            print("RGAN Epoch {:03d}: Generator Loss: {:.3f}".format(epoch, epoch_generator_loss_avg.result() ) , file=sys.stdout)
            
    return gan_model, train_discriminator_loss_results, train_generator_loss_results
コード例 #2
0
class VFIB(keras.Model):
    def __init__(self, encoder, predictor, feature_dim,loss_type,  **kwargs):
        super(VFIB, self).__init__(**kwargs)
        self.encoder = encoder
        self.classifier = predictor
        self.loss_type = loss_type
        self.total_loss_tracker = Mean(name="total_loss")
        self.prediction_loss_tracker = Mean(name="prediction_loss")
        self.kl_loss_tracker = Mean(name="kl_loss")
        self.mmd_loss_tracker = Mean(name="mmd_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.prediction_loss_tracker,
            self.kl_loss_tracker,
            self.mmd_loss_tracker
        ]
    
    def call(self, inputs):
        # 0 refers to first column with sensitive feature 'Age'
        sens, _ = split_sensitive_X(inputs, 0, 1)
        mu, sig, z = self.encoder(inputs)
        preds = self.classifier(tf.concat([z, sens], 1))
        return mu, sig, z, preds
        
        
    def train_step(self, data):
        X, y = data
        with tf.GradientTape() as tape:
            
            z_mean, z_log_sigma, z, preds = self.call(X)

            prediction_loss = neg_log_bernoulli(y, preds)
            kl_loss = KL(z_mean, z_log_sigma)
            mmd_loss = mmd_loss(X, z)
            
            if self.loss_type=='all':
                total_loss =  prediction_loss+ kl_loss + mmd_loss
            elif self.loss_type=='kl':
                total_loss =  prediction_loss+ kl_loss
            else:
                total_loss =  prediction_loss
                
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.prediction_loss_tracker.update_state(prediction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        self.mmd_loss_tracker.update_state(mmd_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "classification_loss": self.prediction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            "mmd_loss": self.mmd_loss_tracker.result()
        }
コード例 #3
0
class MeanBasedMetric(Metric):
    def __init__(self, name, dtype):
        super().__init__(name, dtype=dtype)
        self._mean = Mean(dtype=dtype)

    @abstractmethod
    def _objective_function(self, y_true, y_pred):
        pass

    def update_state(self, y_true, y_pred, sample_weight=None):
        values = self._objective_function(y_true, y_pred)
        self._mean.update_state(values=values, sample_weight=sample_weight)

    def result(self):
        return self._mean.result()

    def reset_states(self):
        self._mean.reset_states()
コード例 #4
0
class StandardVarianceBasedMetric(Metric):
    def __init__(self, name, dtype):
        super().__init__(name, dtype=dtype)
        self._mean = Mean(dtype=dtype)
        self._square_mean = Mean(dtype=dtype)

    @abstractmethod
    def _objective_function(self, y_true, y_pred):
        pass

    def update_state(self, y_true, y_pred, sample_weight=None):
        values = self._objective_function(y_true, y_pred)
        self._mean.update_state(values=values, sample_weight=sample_weight)
        self._square_mean.update_state(values=tf.square(values),
                                       sample_weight=sample_weight)

    def result(self):
        return tf.sqrt(self._square_mean.result() -
                       tf.square(self._mean.result()))

    def reset_states(self):
        self._mean.reset_states()
        self._square_mean.reset_states()
コード例 #5
0
def gain_train_step(dataset, gain, n_epochs):
    generator, discriminator = gain.layers
    discriminator_optimizer = keras.optimizers.SGD(momentum=0.9, nesterov=True) 
    generator_optimizer = keras.optimizers.Adam()
    # Keep results for plotting
    train_discriminator_loss_results = []
    train_generator_loss_results = []
    
    for epoch in range(n_epochs):
        epoch_discriminator_loss_avg = Mean()
        epoch_generator_loss_avg = Mean()
        for x_batch, mask_batch in dataset:
            x_batch = cast(x_batch, float32)
            mask_batch = cast(mask_batch, float32)
            # phase 1: train discriminator
            hint = hint_generator(x_batch, mask_batch)
            generated_samples = generator(concat( [hint, mask_batch], axis = 1))
            discriminator.trainable = True
            discriminator_loss_value, discriminator_grads = gain_grad(discriminator, generated_samples, mask_batch) 
            discriminator_optimizer.apply_gradients(zip(discriminator_grads, discriminator.trainable_variables))
            # phase 2 - training the generator
            hint = hint_generator(x_batch, mask_batch)
            discriminator.trainable = False
            generator_loss_value, generator_grads = gain_grad(gain, concat( [hint, mask_batch], axis = 1), mask_batch) 
            generator_optimizer.apply_gradients(zip(generator_grads, gain.trainable_variables))
            # Track progress: Add current batch loss
            epoch_discriminator_loss_avg.update_state(discriminator_loss_value)
            epoch_generator_loss_avg.update_state(generator_loss_value)
            
        # End epoch
        train_discriminator_loss_results.append(epoch_discriminator_loss_avg.result())
        train_generator_loss_results.append(epoch_generator_loss_avg.result())
        if epoch % 50 == 0:
            print("GAIN Epoch {:03d}: Discriminator Loss: {:.3f}".format(epoch, epoch_discriminator_loss_avg.result() ) , file=sys.stdout)
            print("GAIN Epoch {:03d}: Generator Loss: {:.3f}".format(epoch, epoch_generator_loss_avg.result() ) , file=sys.stdout)
            
    return gain, train_discriminator_loss_results, train_generator_loss_results
コード例 #6
0
ファイル: hider.py プロジェクト: csetraynor/guanyar
def train_wgain(dataset, gain, n_epoch, n_critic, alpha):
    '''Train wgain function
  
    Args:
      - dataset: A dataset TF2 object.
      - gain: a gain model.
      - n_epoch: number of iterations.
      - alpha: hyper-parameter
        
    Returns:
      - gain: Trained model
      - critic loss, generator loss and reconstruction loss for monitoring.
    '''

    generator, discriminator = gain.layers
    d_optimizer = keras.optimizers.RMSprop(lr=0.00005)
    g_optimizer = keras.optimizers.Adam()
    # Keep results for plotting
    train_d_loss_results = []
    train_g_loss_results = []
    train_rec_loss_results = []

    for epoch in range(n_epoch):
        epoch_d_loss_avg = Mean()
        epoch_g_loss_avg = Mean()
        epoch_rec_loss_avg = Mean()
        for x_batch, mask_batch in dataset:
            batch_size, dim = x_batch.shape
            # phase 1: train discriminator
            for _ in range(n_critic):
                hint = hint_generator(x_batch, mask_batch)
                generated_samples = generator(hint, training = True)
                discriminator.trainable = True
                d_loss, d_grads = discriminator_grad(discriminator, generated_samples, mask_batch[:,1:])
                d_optimizer.apply_gradients(zip(d_grads, discriminator.trainable_variables))
            # phase 2 - training the generator
            hint = hint_generator(x_batch, mask_batch)
            discriminator.trainable = False
            g_loss, g_grads = gain_grad(gain, hint)
            d_optimizer.apply_gradients(zip(g_grads, gain.trainable_variables))
            hint = hint_generator(x_batch, mask_batch)
            rec_loss, rec_grads = rec_grad(generator, hint, mask_batch, alpha)
            g_optimizer.apply_gradients(zip(rec_grads, gain.trainable_variables))
            # Track progress: Add current batch loss
            epoch_d_loss_avg.update_state(d_loss)
            epoch_g_loss_avg.update_state(g_loss)
            epoch_rec_loss_avg.update_state(rec_loss)
        # End epoch
        train_d_loss_results.append(epoch_d_loss_avg.result())
        train_g_loss_results.append(epoch_g_loss_avg.result())
        train_rec_loss_results.append(epoch_rec_loss_avg.result())

    return gain, train_d_loss_results, train_g_loss_results, train_rec_loss_results
コード例 #7
0
ファイル: pix2pose.py プロジェクト: oarriaga/paz
class Pix2Pose(Model):
    def __init__(self, image_shape, discriminator, generator, latent_dim):
        super(Pix2Pose, self).__init__()
        self.image_shape = image_shape
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim

    @property
    def metrics(self):
        return [self.generator_loss, self.discriminator_loss]

    def compile(self, optimizers, losses, loss_weights):
        super(Pix2Pose, self).compile()
        self.optimizer_generator = optimizers['generator']
        self.optimizer_discriminator = optimizers['discriminator']
        self.compute_reconstruction_loss = losses['weighted_reconstruction']
        self.compute_error_prediction_loss = losses['error_prediction']
        self.compute_discriminator_loss = losses['discriminator']

        self.generator_loss = Mean(name='generator_loss')
        self.discriminator_loss = Mean(name='discriminator_loss')
        self.reconstruction_loss = Mean(name='weighted_reconstruction')
        self.error_prediction_loss = Mean(name='error_prediction')
        self.reconstruction_weight = loss_weights['weighted_reconstruction']
        self.error_prediction_weight = loss_weights['error_prediction']

    def _build_discriminator_labels(self, batch_size):
        return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0)

    def _add_noise_to_labels(self, labels):
        noise = tf.random.uniform(tf.shape(labels))
        labels = labels + 0.05 * noise
        return labels

    def _get_batch_size(self, values):
        return tf.shape(values)[0]

    def _train_discriminator(self, RGB_inputs, RGBA_true):
        RGB_true = RGBA_true[:, :, :, 0:3]
        RGB_fake = self.generator(RGB_inputs)[:, :, :, 0:3]
        RGB_fake_true = tf.concat([RGB_fake, RGB_true], axis=0)

        batch_size = self._get_batch_size(RGB_inputs)
        y_true = self._build_discriminator_labels(batch_size)
        y_true = self._add_noise_to_labels(y_true)

        with tf.GradientTape() as tape:
            y_pred = self.discriminator(RGB_fake_true)
            discriminator_loss = self.compute_discriminator_loss(
                y_true, y_pred)
        gradients = tape.gradient(discriminator_loss,
                                  self.discriminator.trainable_weights)
        self.optimizer_discriminator.apply_gradients(
            zip(gradients, self.discriminator.trainable_weights))
        return discriminator_loss

    def _train_generator(self, RGB_inputs):
        batch_size = tf.shape(RGB_inputs)[0]
        y_misleading = tf.zeros((batch_size, 1))
        with tf.GradientTape() as tape:
            RGBE_preds = self.generator(RGB_inputs)
            y_pred = self.discriminator(RGBE_preds[..., 0:3])
            generator_loss = self.compute_discriminator_loss(
                y_misleading, y_pred)
        gradients = tape.gradient(generator_loss,
                                  self.generator.trainable_weights)
        self.optimizer_generator.apply_gradients(
            zip(gradients, self.generator.trainable_weights))
        return generator_loss

    def _train_reconstruction(self, RGB_inputs, RGBA_true):
        with tf.GradientTape() as tape:
            RGBE_pred = self.generator(RGB_inputs)
            reconstruction_loss = self.compute_reconstruction_loss(
                RGBA_true, RGBE_pred)
            reconstruction_loss = (self.reconstruction_weight *
                                   reconstruction_loss)
        gradients = tape.gradient(reconstruction_loss,
                                  self.generator.trainable_weights)
        self.optimizer_generator.apply_gradients(
            zip(gradients, self.generator.trainable_weights))
        return reconstruction_loss

    def _train_error_prediction(self, RGB_inputs, RGBA_true):
        with tf.GradientTape() as tape:
            RGBE_pred = self.generator(RGB_inputs)
            error_prediction_loss = self.compute_error_prediction_loss(
                RGBA_true, RGBE_pred)
            error_prediction_loss = (self.error_prediction_weight *
                                     error_prediction_loss)
        gradients = tape.gradient(error_prediction_loss,
                                  self.generator.trainable_weights)
        self.optimizer_generator.apply_gradients(
            zip(gradients, self.generator.trainable_weights))
        return error_prediction_loss

    def train_step(self, data):
        RGB_inputs, RGBA_true = data[0]['RGB_input'], data[1]['RGB_with_error']

        reconstruction_loss = self._train_reconstruction(RGB_inputs, RGBA_true)
        self.reconstruction_loss.update_state(reconstruction_loss)

        error_loss = self._train_error_prediction(RGB_inputs, RGBA_true)
        self.error_prediction_loss.update_state(error_loss)

        discriminator_loss = self._train_discriminator(RGB_inputs, RGBA_true)
        self.discriminator_loss.update_state(discriminator_loss)

        generator_loss = self._train_generator(RGB_inputs)
        self.generator_loss.update_state(generator_loss)

        return {
            'discriminator_loss': self.discriminator_loss.result(),
            'generator_loss': self.generator_loss.result(),
            'reconstruction_loss': self.reconstruction_loss.result(),
            'error_prediction_loss': self.error_prediction_loss.result()
        }
コード例 #8
0
ファイル: adain.py プロジェクト: Jephthia/NNs
class StyleTransfer(Model):
    def __init__(self, *args, encoder=None, decoder=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.init_encoder(encoder)
        self.init_decoder(decoder)
        self.build_metrics()

    def init_encoder(self, encoder):
        if encoder is not None:
            self.encoder = encoder
            return

        self.encoder = Sequential([Input((224, 224, 3))])
        self.encoder.trainable = False

        for layer_name in STYLE_LAYERS:
            layer = vgg19.get_layer(layer_name)
            self.encoder.add(layer)

        self.encoder.compile()

    def init_decoder(self, decoder):
        if decoder is not None:
            self.decoder = decoder
            return

        # Build the decoder if it wasn't provided
        input_shape = self.encoder.layers[-1].output_shape[1:]
        self.decoder = Sequential([Input(input_shape)])

        # The decoder is the trimed inverse of the encoder
        for layer_name in DECODER_LAYERS:
            layer = vgg19.get_layer(layer_name)
            # Add the upsampling to double the image size
            if 'pool' in layer.name:
                block_name = layer.name.split("_")[0]
                self.decoder.add(UpSampling2D(name=f'{block_name}_upsampling'))
            # Add some reflective padding followed by a Conv2D layer
            elif 'conv' in layer.name:
                self.decoder.add(
                    Conv2DReflectivePadding(filters=layer.output_shape[-1],
                                            kernel_size=layer.kernel_size,
                                            strides=layer.strides,
                                            activation='relu',
                                            name=layer.name))

        # Add one final Conv2D to reduce the feature maps to 3 (N,W,H,3)
        self.decoder.add(
            Conv2DReflectivePadding(3, (3, 3), name='output_conv1'))

    def build_metrics(self):
        self.c_loss_metric = Mean(name='c_loss')
        self.s_loss_metric = Mean(name='s_loss')
        self.loss_metric = Mean(name='loss')

    def compile(self, optimizer, content_loss, style_loss, **kwargs):
        super().compile(**kwargs)
        if not getattr(self, 'decoder_compiled', False):
            self.decoder.compile(optimizer=optimizer)
        self.content_loss = content_loss
        self.style_loss = style_loss
        self.adain = AdaIN()

    @tf.function
    def train_step(self, data, training=True):
        c_encoded_outputs, s_encoded_outputs = data

        # The outputs of the encoded style images and the encoded generated images
        # Retrieved from the selected encoder layers used for the loss
        s_loss_outputs = []
        g_loss_outputs = []

        with tf.GradientTape(watch_accessed_variables=training) as tape:
            # 1. Encode the content and style image
            for layer in self.encoder.layers:
                # Encode the content image
                c_encoded_outputs = layer(c_encoded_outputs)
                # Encode the style image
                s_encoded_outputs = layer(s_encoded_outputs)

                # If this layer is used to calculate the loss save its outputs
                if layer.name in LOSS_LAYERS:
                    s_loss_outputs.append(s_encoded_outputs)

            # 2. Adaptive Instance Normalization
            adain_outputs = self.adain(c_encoded_outputs, s_encoded_outputs)

            # 3. Decode the feature maps generated by AdaIN to get the final generated image
            generated_imgs = self.decoder(adain_outputs, training=training)

            # 4. Encode the generated image to calculate the loss
            g_encoded_outputs = generated_imgs
            for layer in self.encoder.layers:
                # Encode the generated image
                g_encoded_outputs = layer(g_encoded_outputs)

                # If this layer is used to calculate the loss save its outputs
                if layer.name in LOSS_LAYERS:
                    g_loss_outputs.append(g_encoded_outputs)

            # 5. Calculate the content loss
            c_per_replica_loss = self.content_loss(g_encoded_outputs,
                                                   adain_outputs)  # (N,W,H)
            # Reduce the loss (we do this ourselves in order to be compatible with distributed training)
            global_c_loss_size = tf.size(
                c_per_replica_loss
            ) * self.distribute_strategy.num_replicas_in_sync
            global_c_loss_size = tf.cast(global_c_loss_size, dtype=tf.float32)
            c_loss = tf.nn.compute_average_loss(
                c_per_replica_loss, global_batch_size=global_c_loss_size)

            assert len(g_loss_outputs) == len(s_loss_outputs)

            # 6. Calculate style loss
            s_loss = 0
            for i in range(len(g_loss_outputs)):
                s_per_replica_loss = self.style_loss(g_loss_outputs[i],
                                                     s_loss_outputs[i])  # (N,)
                # Reduce the loss (we do this ourselves in order to be compatible with distributed training)
                global_s_loss_size = BATCH_SIZE * self.distribute_strategy.num_replicas_in_sync
                s_loss += tf.nn.compute_average_loss(
                    s_per_replica_loss, global_batch_size=global_s_loss_size)

            # 7. Calculate the loss
            loss = c_loss + s_loss * STYLE_WEIGHT

        # 8. Apply gradient descent
        if training:
            gradients = tape.gradient(loss, self.decoder.trainable_variables)

            #             gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
            #             tf.print('---')
            #             tf.print('glonorm', tf.linalg.global_norm(gradients))
            #             tf.print(list((i, tf.math.reduce_min(n), tf.math.reduce_max(n)) for i,n in enumerate(gradients)))
            #             tf.print(list((i, tf.math.reduce_min(n), tf.math.reduce_max(n)) for i,n in enumerate(self.decoder.trainable_variables)))
            #             tf.print('C_ENC --> ', tf.math.reduce_min(c_encoded_outputs), tf.math.reduce_max(c_encoded_outputs))
            #             tf.print('S_ENC --> ', tf.math.reduce_min(s_encoded_outputs), tf.math.reduce_max(s_encoded_outputs))
            #             tf.print('ADAIN --> ', tf.math.reduce_min(adain_outputs), tf.math.reduce_max(adain_outputs))
            #             tf.print('GEN_IMG --> ', tf.math.reduce_min(generated_imgs), tf.math.reduce_max(generated_imgs))
            #             tf.print('G_ENC --> ', tf.math.reduce_min(g_encoded_outputs), tf.math.reduce_max(g_encoded_outputs))
            #             tf.print('S_LOSS_E --> ', tf.math.reduce_min(s_loss_outputs[i]), tf.math.reduce_max(s_loss_outputs[i]))
            #             tf.print('G_LOSS_E --> ', tf.math.reduce_min(g_loss_outputs[i]), tf.math.reduce_max(g_loss_outputs[i]))

            self.decoder.optimizer.apply_gradients(
                zip(gradients, self.decoder.trainable_variables))

        # 9. Update the metrics
        self.c_loss_metric.update_state(c_loss)
        self.s_loss_metric.update_state(s_loss)
        self.loss_metric.update_state(loss)

        return {m.name: m.result() for m in self.metrics}

    @tf.function
    def test_step(self, data):
        c_encoded_outputs, s_encoded_outputs = data

        # The outputs of the encoded style images and the encoded generated images
        # Retrieved from the selected encoder layers used for the loss
        s_loss_outputs = []
        g_loss_outputs = []

        # 1. Encode the content and style image
        for layer in self.encoder.layers:
            # Encode the content image
            c_encoded_outputs = layer(c_encoded_outputs, training=False)
            # Encode the style image
            s_encoded_outputs = layer(s_encoded_outputs, training=False)

            # If this layer is used to calculate the loss save its outputs
            if layer.name in LOSS_LAYERS:
                s_loss_outputs.append(s_encoded_outputs)

        # 2. Adaptive Instance Normalization
        adain_outputs = self.adain(c_encoded_outputs, s_encoded_outputs)

        # 3. Decode the feature maps generated by AdaIN to get the final generated image
        generated_imgs = self.decoder(adain_outputs, training=False)

        # 4. Encode the generated image to calculate the loss
        g_encoded_outputs = generated_imgs
        for layer in self.encoder.layers:
            # Encode the generated image
            g_encoded_outputs = layer(g_encoded_outputs, training=False)

            # If this layer is used to calculate the loss save its outputs
            if layer.name in LOSS_LAYERS:
                g_loss_outputs.append(g_encoded_outputs)

        # 5. Calculate the content loss
        c_per_replica_loss = self.content_loss(g_encoded_outputs,
                                               adain_outputs)  # (N,W,H)
        # Reduce the loss (we do this ourselves in order to be compatible with distributed training)
        global_c_loss_size = tf.size(
            c_per_replica_loss) * self.distribute_strategy.num_replicas_in_sync
        global_c_loss_size = tf.cast(global_c_loss_size, dtype=tf.float32)
        c_loss = tf.nn.compute_average_loss(
            c_per_replica_loss, global_batch_size=global_c_loss_size)

        assert len(g_loss_outputs) == len(s_loss_outputs)

        # 6. Calculate style loss
        s_loss = 0
        for i in range(len(g_loss_outputs)):
            s_per_replica_loss = self.style_loss(g_loss_outputs[i],
                                                 s_loss_outputs[i])  # (N,)
            # Reduce the loss (we do this ourselves in order to be compatible with distributed training)
            global_s_loss_size = BATCH_SIZE * self.distribute_strategy.num_replicas_in_sync
            s_loss += tf.nn.compute_average_loss(
                s_per_replica_loss, global_batch_size=global_s_loss_size)

        # 7. Calculate the loss
        loss = c_loss + s_loss * STYLE_WEIGHT

        # 9. Update the metrics
        self.c_loss_metric.update_state(c_loss)
        self.s_loss_metric.update_state(s_loss)
        self.loss_metric.update_state(loss)

        return {m.name: m.result() for m in self.metrics}

    @tf.function
    def predict_step(self, data):
        content_imgs, style_imgs = data[0]

        # Ensure these are batched
        assert len(content_imgs.shape) == 4
        assert len(style_imgs.shape) == 4

        content_imgs = vgg19_preprocess_input(content_imgs) / 255.0
        style_imgs = vgg19_preprocess_input(style_imgs) / 255.0

        c_encoded = content_imgs
        s_encoded = style_imgs

        # Encode the contents and styles
        for layer in self.encoder.layers:
            c_encoded = layer(c_encoded)
            s_encoded = layer(s_encoded)

        # Apply adaptive batch normalization
        adain_outputs = AdaIN()(c_encoded, s_encoded)

        # Decode the images to generate them
        generated_imgs = self.decoder(adain_outputs)
        generated_imgs = self.deprocess_vgg19(generated_imgs)

        return generated_imgs

    @property
    def metrics(self):
        return [self.loss_metric, self.c_loss_metric, self.s_loss_metric]

    def deprocess_vgg19(self, imgs):
        # Ensure they are batched
        assert len(imgs.shape) == 4

        # Put back to 0...255
        imgs *= 255.0
        # Add mean
        imgs += [103.939, 116.779, 123.68]
        # BGR to RGB
        imgs = imgs[..., ::-1]
        # Clip
        imgs = tf.clip_by_value(imgs, 0.0, 255.0)
        # Cast
        imgs = tf.cast(imgs, tf.uint8)

        return imgs

    def save_architecture(self, log_dir):
        # Ensure the log_dir exists
        pathlib.Path(log_dir).mkdir(parents=True, exist_ok=True)

        with GFile(os.path.join(log_dir, 'style_transfer_architecture.json'),
                   'w') as f:
            f.write(self.to_json())

    def save_encoder(self, log_dir):
        self.encoder.save(log_dir)

    def save_model(self, log_dir, **kwargs):
        self.decoder.save(log_dir, **kwargs)

    @classmethod
    def load(cls, log_dir=None):
        model_found = bool(log_dir) and pathlib.Path(
            os.path.join(log_dir,
                         'style_transfer_architecture.json')).is_file()

        # If there isn't already a model create one from scratch and save it
        if not model_found:
            model = cls()
            if log_dir:
                model.save_architecture(log_dir)
                model.save_encoder(os.path.join(log_dir, 'encoder'))
            return model

        # Load the model's architecture
        with tf.keras.utils.custom_object_scope({
                'StyleTransfer':
                cls,
                'Conv2DReflectivePadding':
                Conv2DReflectivePadding
        }):
            saved_json = GFile(
                os.path.join(log_dir, 'style_transfer_architecture.json'),
                'r').read()
            model = tf.keras.models.model_from_json(saved_json)
            model.encoder = tf.keras.models.load_model(
                os.path.join(log_dir, 'encoder'))

        # Load the decoder's latest weights if there are any
        ckpts = glob.glob(os.path.join(log_dir, 'weights', '*'))
        if ckpts:
            latest_ckpt = max(ckpts, key=os.path.getmtime)
            print('Loading Checkpoint:', latest_ckpt)
            model.decoder = tf.keras.models.load_model(latest_ckpt)
            model.decoder_compiled = True

        return model

    def get_config(self):
        return {'encoder': self.encoder, 'decoder': self.decoder}

    @classmethod
    def from_config(cls, config, **kwargs):
        encoder = tf.keras.models.model_from_json(
            json.dumps(config.pop('encoder')))
        decoder = tf.keras.models.model_from_json(
            json.dumps(config.pop('decoder')))

        style_transfer = cls(encoder=encoder, decoder=decoder)
        return style_transfer
コード例 #9
0
ファイル: vae.py プロジェクト: leandergrech/RL_VAE
class VAE(Model):
    def __init__(self, input_dim, encoder_kwargs, decoder_kwargs, warmup_steps,
                 **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.input_dim = input_dim

        # Generate the encoder and decoder networks
        self.encoder = vae_utils.generate_encoder(**encoder_kwargs)
        self.decoder = vae_utils.generate_decoder(**decoder_kwargs)

        self.latent_dim = encoder_kwargs['latent_dim']

        # Combine encoder and decoder to create VAE structure
        self.input_tensor = Input(shape=(input_dim, ))

        if encoder_kwargs.get('is_variational', True):
            self.latent_tensor = self.encoder(self.input_tensor)[-1]
        else:
            self.latent_tensor = self.encoder(self.input_tensor)

        self.output_tensor = self.decoder(self.latent_tensor)

        self.vae = Model(inputs=self.input_tensor,
                         outputs=[self.output_tensor, self.latent_tensor],
                         name='vae')

        # We changed call() method so that self is self.vae. We compile the above model.
        self.compile(optimizer=Adam(learning_rate=1e-3))

        # Logging
        self.total_loss_tracker = Mean(name='total_loss')
        self.reconstruction_loss_tracker = Mean(name='reconstruction_loss')
        self.kl_loss_tracker = Mean(name="kl_loss")

        # Hyper-parameters
        self.warmup_steps = tf.constant(warmup_steps, dtype=tf.int32)
        self.it = tf.Variable(0, dtype=tf.int32, trainable=False)

    # The model is actually the self.vae Model inside this class.
    def call(self, inputs, training=None, mask=None):
        return self.vae(inputs)

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    # @tf.function
    def train_step(self, data):
        return self.train_step_deterministic(data)
        # return self.train_step_variational(data)

    def train_step_deterministic(self, data):
        with tf.GradientTape() as tape:
            # Get encoder outputs
            z = self.encoder(data)

            # How good are we at reconstruction?
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.square(data - reconstruction))

            self.it.assign_add(1)

        tf.print(tf.reduce_mean(data - reconstruction, axis=0))

        # Gradients w.r.t. the total_loss in the GradientTape
        grads = tape.gradient(reconstruction_loss, self.trainable_weights)

        # Update the network parameters
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

        # Logging
        # self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        # self.kl_loss_tracker.update_state(kl_loss)

        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def train_step_variational(self, data):
        with tf.GradientTape() as tape:
            # Get encoder outputs
            z_mean, z_log_var, z = self.encoder(data)

            # How good are we at reconstruction?
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.square(data - reconstruction))

            # How much regularized is our latent space?
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) -
                              tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))

            # Mask kl_loss during warm-up phase
            kl_loss = tf.cond(
                tf.math.greater_equal(self.it, self.warmup_steps),
                lambda: kl_loss, lambda: 0.0)

            # We will minimize this loss
            total_loss = reconstruction_loss + kl_loss / 10.0

            self.it.assign_add(1)

        # Gradients w.r.t. the total_loss in the GradientTape
        grads = tape.gradient(total_loss, self.vae.trainable_weights)

        # Update the network parameters
        self.optimizer.apply_gradients(zip(grads, self.vae.trainable_weights))

        # Logging
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }
コード例 #10
0
ファイル: hider.py プロジェクト: csetraynor/guanyar
            # train Dz
            for _ in range(n_critic):
                random_noise = generate_noise(no, latent_dim) # z
                generated_samples = rnn_generator(random_noise, training = True) # x_hat
                encodings = rnn_encoder(x_batch, training = True) # z_hat
                encoded_noise = rnn_encoder(generated_samples, training = True) # z'
                rnn_ae_critc.trainable = True
                ae_loss1, ae_grads1 = critic_loss(rnn_ae_critc, encoded_noise, encodings, y1)
                rmsprop_optimizer.apply_gradients(zip(ae_grads1, rnn_ae_critc.trainable_variables))
            # update generator via Dz critic's error
            random_noise = generate_noise(no, latent_dim) # z
            rnn_ae_critc.trainable = False
            ae_loss2, ae_grads2 = generator_loss(gan_model2, random_noise, y2)
            rmsprop_optimizer.apply_gradients(zip(ae_grads2, gan_model2.trainable_variables))
            # Track progress: Add current batch loss
            epoch_dx_loss_avg.update_state(d_loss1)
            epoch_dz_loss_avg.update_state(ae_loss1)
            epoch_rec_loss_avg.update_state(rec_loss1)

        # End epoch
        train_dx_loss_results.append(epoch_dx_loss_avg.result())
        train_dz_loss_results.append(epoch_dz_loss_avg.result())
        train_rec_loss_results.append(epoch_rec_loss_avg.result())

    return rnn_encogen, train_dx_loss_results, train_dz_loss_results, train_rec_loss_results

def rae_wgan(input_dat, seq_times, padding_mask, batch_size, n_epoch, n_critic, latent_dim):
    '''
    Call to RAE GAN
    args:
      input_dat: imputed data, inputs to RAE WGAN.
コード例 #11
0
class build_prob_u_net(Model):
    def __init__(self, num_classes, activation, latent_dim=6, resolution_lvl=5,
                 img_shape=(None, None, 1), seg_shape=(None, None, 1),
                 num_filters=(32, 64, 128, 256, 512), downsample_signal=(2,2,2,2,2)):

        super(build_prob_u_net, self).__init__()
        self.num_classes = num_classes
        self.latent_dim = latent_dim
        self.activation = activation
        self.num_filters = num_filters
        self.resolution_lvl = resolution_lvl
        self.downsample_signal = downsample_signal

        self.prior = self.latent_space_net(img_shape, None)
        self.posterior = self.latent_space_net(img_shape, seg_shape)
        self.det_unet = self.unet(img_shape)

    def latent_space_net(self, img_shape, seg_shape):
        if seg_shape is not None:
            # Posterior inputs
            inputs = [Input(shape=img_shape), Input(shape=seg_shape)]
            input_ = Concatenate(name='input_con') (inputs)
            name = 'prob_unet_posterior'
        else:
            # Prior input
            inputs = Input(shape=img_shape)
            input_ = inputs
            name = 'prob_unet_prior'

        # Encoder blocks
        for i in range(self.resolution_lvl):
            if i == 0:
                x = conv_block(self.num_filters[i], 0, i, amount=2, type_block='encoder_latent') (input_)
            else:
                x = MaxPool2D(pool_size=self.downsample_signal[i],
                              name='encoder_latent_stage0-{}_pool'.format(i)) (x)
                x = conv_block(self.num_filters[i], 0, i, amount=2, type_block='encoder_latent') (x)

        # Z sample
        z, mu, sigma = z_mu_sigma(self.latent_dim, 0, self.resolution_lvl+1) (x)
        return Model(inputs, [z, mu, sigma], name=name)

    def unet(self, img_shape):
        lvl_div = np.power(2, self.resolution_lvl-1)
        z_sample = Input(shape=(None, None, self.latent_dim))
        inputs = Input(shape=img_shape)
        skip_connections = [None] * self.resolution_lvl

        # Encoder blocks
        for i in range(self.resolution_lvl):
            if i == 0:
                x = conv_block(self.num_filters[i], 0, i, amount=2, type_block='encoder') (inputs)
            else:
                x = MaxPool2D(pool_size=self.downsample_signal[i],
                              name='encoder_stage0-{}_pool'.format(i)) (x)
                x = conv_block(self.num_filters[i], 0, i, amount=2, type_block='encoder') (x)

            skip_connections[i] = x
        skip_connections = skip_connections[:-1]

        # Decoder blocks
        for i in reversed(range(self.resolution_lvl-1)):
            x = UpSampling2D(size=self.downsample_signal[i],
                            name='decoder_stage0-{}_up'.format(i)) (x)
            x = Concatenate(name='decoder_stage0-{}_con'.format(i)) ([x, skip_connections[i]])
            x = conv_block(self.num_filters[i], 0, i, amount=2, type_block='decoder') (x)

        # Concatenate U-Net and Z sample
        broadcast_z = tf.tile(z_sample, (1, lvl_div, lvl_div, 1))

        x = Concatenate(name='final_con') ([x, broadcast_z])
        x = conv_block(self.num_filters[0], 0, i, amount=2, type_block='final') (x)
        x = Conv2D(self.num_classes, kernel_size=1, padding='same',
                   activation=self.activation, name='final_conv') (x)
        return Model([inputs, z_sample], x, name='prob_unet_det')

    def kl_score(self, mu0, sigma0, mu1, sigma1):
        # Calculate kl loss
        sigma0_f = K.square(K.flatten(sigma0))
        sigma1_f = K.square(K.flatten(sigma1))
        logsigma0 = K.log(sigma0_f + 1e-10)
        logsigma1 = K.log(sigma1_f + 1e-10)
        mu0_f = K.flatten(mu0)
        mu1_f = K.flatten(mu1)

        return tf.reduce_mean(
            0.5*tf.reduce_sum(tf.divide(sigma0_f + tf.square(mu1_f - mu0_f), sigma1_f + 1e-10)
            + logsigma1 - logsigma0 - 1, axis=-1))

    def compile(self, prior_opt, posterior_opt, unet_opt, loss, metric, beta=1):
        super(build_prob_u_net, self).compile()
        self.posterior_opt = posterior_opt
        self.prior_opt = prior_opt
        self.unet_opt = unet_opt
        self.beta = beta

        self.metric = metric
        self.compiled_loss = loss
        self.metric_tracker = Mean(name='metric')
        self.kl_loss_tracker = Mean(name="kl_loss")
        self.total_loss_tracker = Mean(name='total_loss')
        self.compiled_loss_tracker = Mean(name='compiled_loss')

    @property
    def metrics(self):
        return [
            self.compiled_loss_tracker,
            self.kl_loss_tracker,
            self.total_loss_tracker,
            self.metric_tracker
        ]

    def train_step(self, data):
        img, seg = data
        with tf.GradientTape(persistent=True) as tape:
            #Prior and Posterior
            _, mu_prior, sigma_prior = self.prior(img, training=True)
            z_posterior, mu_posterior, sigma_posterior = self.posterior([img, seg], training=True)

            #U-Net
            reconstruction = self.det_unet([img, z_posterior], training=True)

            #Calculate losses and metric
            kl_loss = self.kl_score(mu_posterior, sigma_posterior, mu_prior, sigma_prior)
            reconstruction_loss = self.compiled_loss(seg, reconstruction)
            total_loss = reconstruction_loss + self.beta * kl_loss
            dsc_score = self.metric(seg, reconstruction)

        # Update weights
        grad_prior = tape.gradient(kl_loss, self.prior.trainable_weights)
        self.prior_opt.apply_gradients(zip(grad_prior, self.prior.trainable_weights))

        grad_posterior = tape.gradient(kl_loss, self.posterior.trainable_weights)
        self.posterior_opt.apply_gradients(zip(grad_posterior, self.posterior.trainable_weights))

        grad_unet = tape.gradient(reconstruction_loss, self.det_unet.trainable_weights)
        self.unet_opt.apply_gradients(zip(grad_unet, self.det_unet.trainable_weights))

        self.metric_tracker.update_state(dsc_score)
        self.kl_loss_tracker.update_state(kl_loss)
        self.total_loss_tracker.update_state(total_loss)
        self.compiled_loss_tracker.update_state(reconstruction_loss)
        return {
            "loss": self.compiled_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            "total_loss": self.total_loss_tracker.result(),
            "dice_coef": self.metric_tracker.result()
        }

    def test_step(self, data):
        img, seg = data
        z_prior, mu_prior, sigma_prior = self.prior(img, training=False)
        _, mu_posterior, sigma_posterior = self.posterior([img, seg], training=False)
        reconstruction = self.det_unet([img, z_prior], training=False)

        kl_loss = self.kl_score(mu_posterior, sigma_posterior, mu_prior, sigma_prior)
        reconstruction_loss = tf.reduce_mean(tf.reduce_sum(self.compiled_loss(seg, reconstruction)))
        total_loss = reconstruction_loss + self.beta * kl_loss
        dsc_score = tf.reduce_mean(tf.reduce_sum(self.metric(seg, reconstruction)))

        self.metric_tracker.update_state(dsc_score)
        self.kl_loss_tracker.update_state(kl_loss)
        self.total_loss_tracker.update_state(total_loss)
        self.compiled_loss_tracker.update_state(reconstruction_loss)
        return {
            "loss": self.compiled_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            "total_loss": self.total_loss_tracker.result(),
            "dice_coef": self.metric_tracker.result()
        }
コード例 #12
0
ファイル: model.py プロジェクト: jh88/fbnet
class Trainer():
    def __init__(self,
                 fbnet,
                 input_shape,
                 initial_temperature=5,
                 temperature_decay_rate=0.956,
                 temperature_decay_steps=1,
                 latency_alpha=0.2,
                 latency_beta=0.6,
                 weight_lr=0.01,
                 weight_momentum=0.9,
                 weight_decay=1e-4,
                 theta_lr=1e-3,
                 theta_beta1=0.9,
                 theta_beta2=0.999,
                 theta_decay=5e-4):
        self._epoch = 0

        self.initial_temperature = initial_temperature
        self.temperature = initial_temperature
        self.latency_alpha = latency_alpha
        self.latency_beta = latency_beta

        self.exponential_decay = lambda step: exponential_decay(
            initial_temperature, temperature_decay_rate,
            temperature_decay_steps, step)

        fbnet.build(input_shape)
        self.fbnet = fbnet

        self.weights = []
        self.thetas = []
        for trainable_weight in fbnet.trainable_weights:
            if 'theta' in trainable_weight.name:
                self.thetas.append(trainable_weight)
            else:
                self.weights.append(trainable_weight)

        self.weight_opt = SGD(learning_rate=weight_lr,
                              momentum=weight_momentum,
                              decay=weight_decay)

        self.theta_opt = Adam(learning_rate=theta_lr,
                              beta_1=theta_beta1,
                              beta_2=theta_beta2,
                              decay=theta_decay)

        self.loss_fn = SparseCategoricalCrossentropy(from_logits=True)
        self.accuracy_metric = SparseCategoricalAccuracy()
        self.loss_metric = Mean()

    @property
    def epoch(self):
        return self._epoch

    @epoch.setter
    def epoch(self, epoch):
        self._epoch = epoch
        self.temperature = self.exponential_decay(epoch)

    def reset_metrics(self):
        self.accuracy_metric.reset_states()
        self.loss_metric.reset_states()

    def _train(self, x, y, weights, opt, training=True):
        with tf.GradientTape() as tape:
            y_hat = self.fbnet(x, self.temperature, training=training)
            loss = self.loss_fn(y, y_hat)
            latency = sum(self.fbnet.losses)
            loss += latency_loss(latency, self.latency_alpha,
                                 self.latency_beta)

        grads = tape.gradient(loss, weights)
        opt.apply_gradients(zip(grads, weights))

        self.accuracy_metric.update_state(y, y_hat)
        self.loss_metric.update_state(loss)

    @tf.function
    def train_weights(self, x, y):
        self._train(x, y, self.weights, self.weight_opt)

    @tf.function
    def train_thetas(self, x, y):
        self._train(x, y, self.thetas, self.theta_opt, training=False)

    @property
    def training_accuracy(self):
        return self.accuracy_metric.result().numpy()

    @property
    def training_loss(self):
        return self.loss_metric.result().numpy()

    @tf.function
    def predict(self, x):
        y_hat = self.fbnet(x, self.temperature, training=False)

        return y_hat

    def evaluate(self, dataset):
        accuracy_metric = SparseCategoricalAccuracy()
        for x, y in dataset:
            y_hat = self.predict(x)

            accuracy_metric.update_state(y, y_hat)

        return accuracy_metric.result().numpy()

    def sample_sequential_config(self):
        ops = [
            op.sample(self.temperature)
            if isinstance(op, MixedOperation) else op for op in self.fbnet.ops
        ]

        sequential_config = {
            'name':
            'sampled_fbnet',
            'layers': [{
                'class_name': type(op).__name__,
                'config': op.get_config()
            } for op in ops if not isinstance(op, Identity)]
        }

        return sequential_config

    def save_weights(self, checkpoint):
        self.fbnet.save_weights(checkpoint, save_format='tf')

    def load_weights(self, checkpoint):
        self.fbnet.load_weights(checkpoint)
コード例 #13
0
class DualStudent(Model):
    """"
    Dual Student for Automatic Speech Recognition (ASR).

    How to train: 1) set the optimizer by means of compile(), 2) use train()
    How to test: use test()

    Remarks:
    - Do not use fit() by Keras, use train()
    - Do not use evaluate() by Keras, use test()
    - Compiled metrics and loss (i.e. set by means of compile()) are not used

    Original proposal for image classification: https://arxiv.org/abs/1909.01804
    """
    def __init__(self,
                 n_classes,
                 n_hidden_layers=3,
                 n_units=96,
                 consistency_loss='mse',
                 consistency_scale=10,
                 stabilization_scale=100,
                 xi=0.6,
                 padding_value=0.,
                 sigma=0.01,
                 schedule='rampup',
                 schedule_length=5,
                 version='mono_directional'):
        """
        Constructs a Dual Student model.

        :param n_classes: number of classes (i.e. number of units in the last layer of each student)
        :param n_hidden_layers: number of hidden layers in each student (i.e. LSTM layers)
        :param n_units: number of units for each hidden layer
        :param consistency_loss: one of 'mse', 'kl'
        :param consistency_scale: maximum value of weight for consistency constraint
        :param stabilization_scale: maximum value of weight for stabilization constraint
        :param xi: threshold for stable sample
        :param padding_value: value used to pad input sequences (used as mask_value for Masking layer)
        :param sigma: standard deviation for noisy augmentation
        :param schedule: type of schedule for lambdas, one of 'rampup', 'triangular_cycling', 'sinusoidal_cycling'
        :param schedule_length:
        :param version: one of:
            - 'mono_directional': both students have mono-directional LSTM layers
            - 'bidirectional: both students have bidirectional LSTM layers
            - 'imbalanced': one student has mono-directional LSTM layers, the other one bidirectional
        """
        super(DualStudent, self).__init__()

        # store parameters
        self.n_classes = n_classes
        self.padding_value = padding_value
        self.n_units = n_units
        self.n_hidden_layers = n_hidden_layers
        self.xi = xi
        self.consistency_scale = consistency_scale
        self.stabilization_scale = stabilization_scale
        self.sigma = sigma
        self.version = version
        self.schedule = schedule
        self.schedule_length = schedule_length
        self._lambda1 = None
        self._lambda2 = None

        # schedule for lambdas
        if schedule == 'rampup':
            self.schedule_fn = sigmoid_rampup
        elif schedule == 'triangular_cycling':
            self.schedule_fn = triangular_cycling
        elif schedule == 'sinusoidal_cycling':
            self.schedule_fn = sinusoidal_cycling
        else:
            raise ValueError('Invalid schedule')

        # loss
        self._loss_cls = SparseCategoricalCrossentropy()  # classification loss
        self._loss_sta = MeanSquaredError()  # stabilization loss
        if consistency_loss == 'mse':
            self._loss_con = MeanSquaredError()  # consistency loss
        elif consistency_loss == 'kl':
            self._loss_con = KLDivergence()
        else:
            raise ValueError('Invalid consistency metric')

        # metrics for training
        self._loss1 = Mean(
            name='loss1')  # we want to average the loss for each batch
        self._loss2 = Mean(name='loss2')
        self._loss1_cls = Mean(name='loss1_cls')
        self._loss2_cls = Mean(name='loss2_cls')
        self._loss1_con = Mean(name='loss1_con')
        self._loss2_con = Mean(name='loss2_con')
        self._loss1_sta = Mean(name='loss1_sta')
        self._loss2_sta = Mean(name='loss2_sta')
        self._acc1 = SparseCategoricalAccuracy(name='acc1')
        self._acc2 = SparseCategoricalAccuracy(name='acc2')

        # metrics for testing
        self._test_loss1 = Mean(name='test_loss1')
        self._test_loss2 = Mean(name='test_loss2')
        self._test_acc1_train_phones = SparseCategoricalAccuracy(
            name='test_acc1_train_phones')
        self._test_acc2_train_phones = SparseCategoricalAccuracy(
            name='test_acc2_train_phones')
        self._test_acc1 = Accuracy(name='test_acc1')
        self._test_acc2 = Accuracy(name='test_acc2')
        self._test_per1 = PhoneErrorRate(name='test_per1')
        self._test_per2 = PhoneErrorRate(name='test_per2')

        # compose students
        if version == 'mono_directional':
            lstm_types = ['mono_directional', 'mono_directional']
        elif version == 'bidirectional':
            lstm_types = ['bidirectional', 'bidirectional']
        elif version == 'imbalanced':
            lstm_types = ['mono_directional', 'bidirectional']
        else:
            raise ValueError('Invalid student version')
        self.student1 = self._get_student('student1', lstm_types[0])
        self.student2 = self._get_student('student2', lstm_types[1])

        # masking layer (just to use compute_mask and remove padding)
        self.mask = Masking(mask_value=self.padding_value)

    def _get_student(self, name, lstm_type):
        student = Sequential(name=name)
        student.add(Masking(mask_value=self.padding_value))
        if lstm_type == 'mono_directional':
            for i in range(self.n_hidden_layers):
                student.add(LSTM(units=self.n_units, return_sequences=True))
        elif lstm_type == 'bidirectional':
            for i in range(self.n_hidden_layers):
                student.add(
                    Bidirectional(
                        LSTM(units=self.n_units, return_sequences=True)))
        else:
            raise ValueError('Invalid LSTM version')
        student.add(Dense(units=self.n_classes, activation="softmax"))
        return student

    def _noisy_augment(self, x):
        return x + tf.random.normal(shape=x.shape, stddev=self.sigma)

    def call(self, inputs, training=False, student='student1', **kwargs):
        """
        Feed-forwards inputs to one of the students.

        This function is called internally by __call__(). Do not use it directly, use the model as callable. You may
        prefer to use pad_and_predict() instead of this, because it pads the sequences and splits in batches. For a big
        dataset, it is strongly suggested that you use pad_and_predict().

        :param inputs: tensor of shape (batch_size, n_frames, n_features)
        :param training: boolean, whether the call is in inference mode or training mode
        :param student: one of 'student1', 'student2'
        :return: tensor of shape (batch_size, n_frames, n_classes), softmax activations (probabilities)
        """
        if student == 'student1':
            return self.student1(inputs, training=training)
        elif student != 'student1':
            return self.student2(inputs, training=training)
        else:
            raise ValueError('Invalid student')

    def build(self, input_shape):
        super(DualStudent, self).build(input_shape)
        self.student1.build(input_shape)
        self.student2.build(input_shape)

    def train(self,
              x_labeled,
              x_unlabeled,
              y_labeled,
              x_val=None,
              y_val=None,
              n_epochs=10,
              batch_size=32,
              shuffle=True,
              evaluation_mapping=None,
              logs_path=None,
              checkpoints_path=None,
              initial_epoch=0,
              seed=None):
        """
        Trains the students with both labeled and unlabeled data (semi-supervised learning).

        :param x_labeled: numpy array of numpy arrays (n_frames, n_features), features corresponding to y_labeled.
            'n_frames' can vary, padding is added to make x_labeled a tensor.
        :param x_unlabeled: numpy array of numpy arrays of shape (n_frames, n_features), features without labels.
            'n_frames' can vary, padding is added to make x_unlabeled a tensor.
        :param y_labeled: numpy array of numpy arrays of shape (n_frames,), labels corresponding to x_labeled.
            'n_frames' can vary, padding is added to make y_labeled a tensor.
        :param x_val: like x_labeled, but for validation set
        :param y_val: like y_labeled, but for validation set
        :param n_epochs: integer, number of training epochs
        :param batch_size: integer, batch size
        :param shuffle: boolean, whether to shuffle at each epoch or not
        :param evaluation_mapping: dictionary {training label -> test label}, the test phones should be a subset of the
            training phones
        :param logs_path: path where to save logs for TensorBoard
        :param checkpoints_path: path to a directory. If the directory contains checkpoints, the latest checkpoint is
            restored.
        :param initial_epoch: int, initial epoch from which to start the training. It can be used together with
            checkpoints_path to resume the training from a previous run.
        :param seed: seed for the random number generator
        """
        # set seed
        if seed is not None:
            np.random.seed(seed)
            tf.random.set_seed(seed)

        # show summary
        self.build(input_shape=(None, ) + x_labeled[0].shape)
        self.student1.summary()
        self.student2.summary()

        # setup for logs
        train_summary_writer = None
        if logs_path is not None:
            train_summary_writer = tf.summary.create_file_writer(logs_path)

        # setup for checkpoints
        checkpoint = None
        if checkpoints_path is not None:
            checkpoint = tf.train.Checkpoint(optimizer=self.optimizer,
                                             model=self)
            checkpoint_path = tf.train.latest_checkpoint(checkpoints_path)
            if checkpoint_path is not None:
                checkpoint.restore(checkpoint_path)
            checkpoint_path = Path(checkpoints_path) / 'ckpt'
            checkpoint_path = str(checkpoint_path)

        # compute batch sizes
        labeled_batch_size = ceil(
            len(x_labeled) / (len(x_unlabeled) + len(x_labeled)) * batch_size)
        unlabeled_batch_size = batch_size - labeled_batch_size
        n_batches = min(ceil(len(x_unlabeled) / unlabeled_batch_size),
                        ceil(len(x_labeled) / labeled_batch_size))

        # training loop
        for epoch in trange(initial_epoch, n_epochs, desc='epochs'):
            # ramp up lambda1 and lambda2
            self._lambda1 = self.consistency_scale * self.schedule_fn(
                epoch, self.schedule_length)
            self._lambda2 = self.stabilization_scale * self.schedule_fn(
                epoch, self.schedule_length)

            # shuffle training set
            if shuffle:
                indices = np.arange(
                    len(x_labeled)
                )  # get indices to shuffle coherently features and labels
                np.random.shuffle(indices)
                x_labeled = x_labeled[indices]
                y_labeled = y_labeled[indices]
                np.random.shuffle(x_unlabeled)

            for i in trange(n_batches, desc='batches'):
                # select batch
                x_labeled_batch = select_batch(x_labeled, i,
                                               labeled_batch_size)
                x_unlabeled_batch = select_batch(x_unlabeled, i,
                                                 unlabeled_batch_size)
                y_labeled_batch = select_batch(y_labeled, i,
                                               labeled_batch_size)

                # pad batch
                x_labeled_batch = pad_sequences(x_labeled_batch,
                                                padding='post',
                                                value=self.padding_value,
                                                dtype='float32')
                x_unlabeled_batch = pad_sequences(x_unlabeled_batch,
                                                  padding='post',
                                                  value=self.padding_value,
                                                  dtype='float32')
                y_labeled_batch = pad_sequences(y_labeled_batch,
                                                padding='post',
                                                value=-1)

                # convert to tensors
                x_labeled_batch = tf.convert_to_tensor(x_labeled_batch)
                x_unlabeled_batch = tf.convert_to_tensor(x_unlabeled_batch)
                y_labeled_batch = tf.convert_to_tensor(y_labeled_batch)

                # train step
                self._train_step(x_labeled_batch, x_unlabeled_batch,
                                 y_labeled_batch)

            # put metrics in dictionary (easy management)
            train_metrics = {
                self._loss1.name: self._loss1.result(),
                self._loss2.name: self._loss2.result(),
                self._loss1_cls.name: self._loss1_cls.result(),
                self._loss2_cls.name: self._loss2_cls.result(),
                self._loss1_con.name: self._loss1_con.result(),
                self._loss2_con.name: self._loss2_con.result(),
                self._loss1_sta.name: self._loss1_sta.result(),
                self._loss2_sta.name: self._loss2_sta.result(),
                self._acc1.name: self._acc1.result(),
                self._acc2.name: self._acc2.result(),
            }
            metrics = {'train': train_metrics}

            # test on validation set
            if x_val is not None and y_val is not None:
                val_metrics = self.test(x_val,
                                        y_val,
                                        evaluation_mapping=evaluation_mapping)
                metrics['val'] = val_metrics

            # print metrics
            for dataset, metrics_ in metrics.items():
                print(f'Epoch {epoch + 1} - ', dataset, ' - ', sep='', end='')
                for k, v in metrics_.items():
                    print(f'{k}: {v}, ', end='')
                print()

            # save logs
            if train_summary_writer is not None:
                with train_summary_writer.as_default():
                    for dataset, metrics_ in metrics.items():
                        for k, v in metrics_.items():
                            tf.summary.scalar(k, v, step=epoch)

            # save checkpoint
            if checkpoint is not None:
                checkpoint.save(file_prefix=checkpoint_path)

            # reset metrics
            self._loss1.reset_states()
            self._loss2.reset_states()
            self._loss1_cls.reset_states()
            self._loss2_cls.reset_states()
            self._loss1_con.reset_states()
            self._loss2_con.reset_states()
            self._loss1_sta.reset_states()
            self._loss2_sta.reset_states()
            self._acc1.reset_states()
            self._acc2.reset_states()

    """
    If you want to use graph execution, pad the whole dataset externally and uncomment the decorator below.
    If you uncomment the decorator without padding the dataset, the graph will be compiled for each batch, 
    because train() pads at batch level and so the batches have different shapes. This would result in worse
    performance compared to eager execution.
    """

    # @tf.function
    def _train_step(self, x_labeled, x_unlabeled, y_labeled):
        # noisy augmented batches (TODO: improvement with data augmentation instead of noise)
        B1_labeled = self._noisy_augment(x_labeled)
        B2_labeled = self._noisy_augment(x_labeled)
        B1_unlabeled = self._noisy_augment(x_unlabeled)
        B2_unlabeled = self._noisy_augment(x_unlabeled)

        # compute masks (to remove padding)
        mask_labeled = self.mask.compute_mask(x_labeled)
        mask_unlabeled = self.mask.compute_mask(x_unlabeled)
        y_labeled = y_labeled[mask_labeled]  # remove padding from labels

        # forward pass
        with tf.GradientTape(persistent=True) as tape:
            # predict augmented labeled samples (for classification and consistency constraint)
            prob1_labeled_B1 = self.student1(B1_labeled, training=True)
            prob1_labeled_B2 = self.student1(B2_labeled, training=True)
            prob2_labeled_B1 = self.student2(B1_labeled, training=True)
            prob2_labeled_B2 = self.student2(B2_labeled, training=True)

            # predict augmented unlabeled samples (for consistency and stabilization constraints)
            prob1_unlabeled_B1 = self.student1(B1_unlabeled, training=True)
            prob1_unlabeled_B2 = self.student1(B2_unlabeled, training=True)
            prob2_unlabeled_B1 = self.student2(B1_unlabeled, training=True)
            prob2_unlabeled_B2 = self.student2(B2_unlabeled, training=True)

            # remove padding
            prob1_labeled_B1 = prob1_labeled_B1[mask_labeled]
            prob1_labeled_B2 = prob1_labeled_B2[mask_labeled]
            prob2_labeled_B1 = prob2_labeled_B1[mask_labeled]
            prob2_labeled_B2 = prob2_labeled_B2[mask_labeled]
            prob1_unlabeled_B1 = prob1_unlabeled_B1[mask_unlabeled]
            prob1_unlabeled_B2 = prob1_unlabeled_B2[mask_unlabeled]
            prob2_unlabeled_B1 = prob2_unlabeled_B1[mask_unlabeled]
            prob2_unlabeled_B2 = prob2_unlabeled_B2[mask_unlabeled]

            # compute classification losses
            L1_cls = self._loss_cls(y_labeled, prob1_labeled_B1)
            L2_cls = self._loss_cls(y_labeled, prob2_labeled_B2)

            # concatenate labeled and unlabeled probability predictions (for consistency loss)
            prob1_labeled_unlabeled_B1 = tf.concat(
                [prob1_labeled_B1, prob1_unlabeled_B1], axis=0)
            prob1_labeled_unlabeled_B2 = tf.concat(
                [prob1_labeled_B2, prob1_unlabeled_B2], axis=0)
            prob2_labeled_unlabeled_B1 = tf.concat(
                [prob2_labeled_B1, prob2_unlabeled_B1], axis=0)
            prob2_labeled_unlabeled_B2 = tf.concat(
                [prob2_labeled_B2, prob2_unlabeled_B2], axis=0)

            # compute consistency losses
            L1_con = self._loss_con(prob1_labeled_unlabeled_B1,
                                    prob1_labeled_unlabeled_B2)
            L2_con = self._loss_con(prob2_labeled_unlabeled_B1,
                                    prob2_labeled_unlabeled_B2)

            # prediction
            P1_unlabeled_B1 = tf.argmax(prob1_unlabeled_B1, axis=-1)
            P1_unlabeled_B2 = tf.argmax(prob1_unlabeled_B2, axis=-1)
            P2_unlabeled_B1 = tf.argmax(prob2_unlabeled_B1, axis=-1)
            P2_unlabeled_B2 = tf.argmax(prob2_unlabeled_B2, axis=-1)

            # confidence (probability of predicted class)
            M1_unlabeled_B1 = tf.reduce_max(prob1_unlabeled_B1, axis=-1)
            M1_unlabeled_B2 = tf.reduce_max(prob1_unlabeled_B2, axis=-1)
            M2_unlabeled_B1 = tf.reduce_max(prob2_unlabeled_B1, axis=-1)
            M2_unlabeled_B2 = tf.reduce_max(prob2_unlabeled_B2, axis=-1)

            # stable samples (masks to index probabilities)
            R1 = tf.logical_and(
                P1_unlabeled_B1 == P1_unlabeled_B2,
                tf.logical_or(M1_unlabeled_B1 > self.xi,
                              M1_unlabeled_B2 > self.xi))
            R2 = tf.logical_and(
                P2_unlabeled_B1 == P2_unlabeled_B2,
                tf.logical_or(M2_unlabeled_B1 > self.xi,
                              M2_unlabeled_B2 > self.xi))
            R12 = tf.logical_and(R1, R2)

            # stabilities
            epsilon1 = MSE(prob1_unlabeled_B1[R12], prob1_unlabeled_B2[R12])
            epsilon2 = MSE(prob2_unlabeled_B1[R12], prob2_unlabeled_B2[R12])

            # compute stabilization losses
            L1_sta = self._loss_sta(
                prob1_unlabeled_B1[R12][epsilon1 > epsilon2],
                prob2_unlabeled_B1[R12][epsilon1 > epsilon2])
            L2_sta = self._loss_sta(
                prob1_unlabeled_B2[R12][epsilon1 < epsilon2],
                prob2_unlabeled_B2[R12][epsilon1 < epsilon2])

            L1_sta += self._loss_sta(
                prob1_unlabeled_B1[tf.logical_and(tf.logical_not(R1), R2)],
                prob2_unlabeled_B1[tf.logical_and(tf.logical_not(R1), R2)])
            L2_sta += self._loss_sta(
                prob1_unlabeled_B2[tf.logical_and(R1, tf.logical_not(R2))],
                prob2_unlabeled_B2[tf.logical_and(R1, tf.logical_not(R2))])

            # compute complete losses
            L1 = L1_cls + self._lambda1 * L1_con + self._lambda2 * L1_sta
            L2 = L2_cls + self._lambda1 * L2_con + self._lambda2 * L2_sta

        # backward pass
        gradients1 = tape.gradient(L1, self.student1.trainable_variables)
        gradients2 = tape.gradient(L2, self.student2.trainable_variables)
        self.optimizer.apply_gradients(
            zip(gradients1, self.student1.trainable_variables))
        self.optimizer.apply_gradients(
            zip(gradients2, self.student2.trainable_variables))
        del tape  # to release memory (persistent tape)

        # update metrics
        self._loss1.update_state(L1)
        self._loss2.update_state(L2)
        self._loss1_cls.update_state(L1_cls)
        self._loss2_cls.update_state(L2_cls)
        self._loss1_con.update_state(L1_con)
        self._loss2_con.update_state(L2_con)
        self._loss1_sta.update_state(L1_sta)
        self._loss2_sta.update_state(L2_sta)
        self._acc1.update_state(y_labeled, prob1_labeled_B1)
        self._acc2.update_state(y_labeled, prob2_labeled_B2)

    def test(self, x, y, batch_size=32, evaluation_mapping=None):
        """
        Tests the model (both students).

        :param x: numpy array of numpy arrays (n_frames, n_features), features corresponding to y_labeled.
            'n_frames' can vary, padding is added to make x a tensor.
        :param y: numpy array of numpy arrays of shape (n_frames,), labels corresponding to x_labeled.
            'n_frames' can vary, padding is added to make y a tensor.
        :param batch_size: integer, batch size
        :param evaluation_mapping: dictionary {training label -> test label}, the test phones should be a subset of the
            training phones
        :return: dictionary {metric_name -> value}
        """
        # test batch by batch
        n_batches = ceil(len(x) / batch_size)
        for i in trange(n_batches, desc='test batches'):
            # select batch
            x_batch = select_batch(x, i, batch_size)
            y_batch = select_batch(y, i, batch_size)

            # pad batch
            x_batch = pad_sequences(x_batch,
                                    padding='post',
                                    value=self.padding_value,
                                    dtype='float32')
            y_batch = pad_sequences(y_batch, padding='post', value=-1)

            # convert to tensors
            x_batch = tf.convert_to_tensor(x_batch)
            y_batch = tf.convert_to_tensor(y_batch)

            # test step
            self._test_step(x_batch, y_batch, evaluation_mapping)

        # put metrics in dictionary (easy management)
        test_metrics = {
            self._test_loss1.name:
            self._test_loss1.result(),
            self._test_loss2.name:
            self._test_loss2.result(),
            self._test_acc1_train_phones.name:
            self._test_acc1_train_phones.result(),
            self._test_acc2_train_phones.name:
            self._test_acc2_train_phones.result(),
            self._test_acc1.name:
            self._test_acc1.result(),
            self._test_acc2.name:
            self._test_acc2.result(),
            self._test_per1.name:
            self._test_per1.result(),
            self._test_per2.name:
            self._test_per2.result(),
        }

        # reset metrics
        self._test_loss1.reset_states()
        self._test_loss2.reset_states()
        self._test_acc1_train_phones.reset_states()
        self._test_acc2_train_phones.reset_states()
        self._test_acc1.reset_states()
        self._test_acc2.reset_states()
        self._test_per1.reset_states()
        self._test_per2.reset_states()

        return test_metrics

    # @tf.function      # see note in _train_step()
    def _test_step(self, x, y, evaluation_mapping):
        # compute mask (to remove padding)
        mask = self.mask.compute_mask(x)

        # forward pass
        y_prob1_train_phones = self.student1(x, training=False)
        y_prob2_train_phones = self.student2(x, training=False)
        y_pred1_train_phones = tf.argmax(y_prob1_train_phones, axis=-1)
        y_pred2_train_phones = tf.argmax(y_prob2_train_phones, axis=-1)
        y_train_phones = tf.identity(y)

        # map labels to set of test phones
        if evaluation_mapping is not None:
            y = tf.numpy_function(map_labels,
                                  [y_train_phones, evaluation_mapping],
                                  [tf.float32])
            y_pred1 = tf.numpy_function(
                map_labels, [y_pred1_train_phones, evaluation_mapping],
                [tf.float32])
            y_pred2 = tf.numpy_function(
                map_labels, [y_pred2_train_phones, evaluation_mapping],
                [tf.float32])
        else:
            y = y_train_phones
            y_pred1 = y_pred1_train_phones
            y_pred2 = y_pred2_train_phones

        # update phone error rate
        self._test_per1.update_state(y, y_pred1, mask)
        self._test_per2.update_state(y, y_pred2, mask)

        # remove padding
        y_pred1 = y_pred1[mask]
        y_pred2 = y_pred2[mask]
        y_prob1_train_phones = y_prob1_train_phones[mask]
        y_prob2_train_phones = y_prob2_train_phones[mask]
        y_train_phones = y_train_phones[mask]
        y = y[mask]

        # compute loss
        loss1 = self._loss_cls(y_train_phones, y_prob1_train_phones)
        loss2 = self._loss_cls(y_train_phones, y_prob2_train_phones)

        # update loss
        self._test_loss1.update_state(loss1)
        self._test_loss2.update_state(loss2)

        # update accuracy using training phones
        self._test_acc1_train_phones.update_state(y_train_phones,
                                                  y_prob1_train_phones)
        self._test_acc2_train_phones.update_state(y_train_phones,
                                                  y_prob2_train_phones)

        # update accuracy using test phones
        self._test_acc1.update_state(y, y_pred1)
        self._test_acc2.update_state(y, y_pred2)
コード例 #14
0
def low_level_train(optimizer, yolo_loss, train_datasets, valid_datasets, train_steps, valid_steps):
    """
    以底层的方式训练,这种方式更好地观察训练过程,监视变量的变化
    :param optimizer: 优化器
    :param yolo_loss: 自定义的loss function
    :param train_datasets: 以tf.data封装好的训练集数据
    :param valid_datasets: 验证集数据
    :param train_steps: 迭代一个epoch的轮次
    :param valid_steps: 同上
    :return: None
    """
    # 创建模型结构
    model = yolo_body()

    # 定义模型评估指标
    train_loss = Mean(name='train_loss')
    valid_loss = Mean(name='valid_loss')

    # 设置保存最好模型的指标
    best_test_loss = float('inf')
    patience = 10
    min_delta = 1e-3
    patience_cnt = 0
    history_loss = []

    # 创建summary
    summary_writer = tf.summary.create_file_writer(logdir=cfg.log_dir)

    # low level的方式计算loss
    for epoch in range(1, cfg.epochs + 1):
        train_loss.reset_states()
        valid_loss.reset_states()
        step = 0
        print("Epoch {}/{}".format(epoch, cfg.epochs))

        # 处理训练集数据
        for batch, (images, labels) in enumerate(train_datasets.take(train_steps)):
            with tf.GradientTape() as tape:
                # 得到预测
                outputs = model(images, training=True)
                # 计算损失(注意这里收集model.losses的前提是Conv2D的kernel_regularizer参数)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                # yolo_loss、label、output都是3个特征层的数据,通过for 拆包之后,一个loss_fn就是yolo_loss中一个特征层
                # 然后逐一计算,
                for output, label, loss_fn in zip(outputs, labels, yolo_loss):
                    pred_loss.append(loss_fn(label, output))

                # 总损失 = yolo损失 + 正则化损失
                total_train_loss = tf.reduce_sum(pred_loss) + regularization_loss

            # 反向传播梯度下降
            # model.trainable_variables代表把loss反向传播到每个可以训练的变量中
            grads = tape.gradient(total_train_loss, model.trainable_variables)
            # 将每个节点的误差梯度gradients,用于更新该节点的可训练变量值
            # zip是把梯度和可训练变量值打包成元组
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            # 更新train_loss
            train_loss.update_state(total_train_loss)
            # 输出训练过程
            rate = (step + 1) / train_steps
            a = "*" * int(rate * 70)
            b = "." * int((1 - rate) * 70)
            loss = train_loss.result().numpy()

            print("\r{}/{} {:^3.0f}%[{}->{}] - loss:{:.4f}".
                  format(batch, train_steps, int(rate * 100), a, b, loss), end='')
            step += 1

        # 计算验证集
        for batch, (images, labels) in enumerate(valid_datasets.take(valid_steps)):
            # 得到预测,不training
            outputs = model(images)
            regularization_loss = tf.reduce_sum(model.losses)
            pred_loss = []
            for output, label, loss_fn in zip(outputs, labels, yolo_loss):
                pred_loss.append(loss_fn(label, output))

            total_valid_loss = tf.reduce_sum(pred_loss) + regularization_loss

            # 更新valid_loss
            valid_loss.update_state(total_valid_loss)

        print('\nLoss: {:.4f}, Test Loss: {:.4f}\n'.format(train_loss.result(), valid_loss.result()))
        # 保存loss,可以选择train的loss
        history_loss.append(valid_loss.result().numpy())

        # 保存到tensorboard里
        with summary_writer.as_default():
            tf.summary.scalar('train_loss', train_loss.result(), step=optimizer.iterations)
            tf.summary.scalar('valid_loss', valid_loss.result(), step=optimizer.iterations)

        # 只保存最好模型
        if valid_loss.result() < best_test_loss:
            best_test_loss = valid_loss.result()
            model.save_weights(cfg.model_path, save_format='tf')

        # EarlyStopping
        if epoch > 1 and history_loss[epoch - 2] - history_loss[epoch - 1] > min_delta:
            patience_cnt = 0
        else:
            patience_cnt += 1

        if patience_cnt >= patience:
            tf.print("No improvement for {} times, early stopping optimization.".format(patience))
            break
コード例 #15
0
class VFAE(keras.Model):
    def __init__(self,
                 encoder,
                 encoder_z,
                 reconstructor_z,
                 decoder,
                 classifier,
                 feature_dim,
                 loss_type,
                 **kwargs):
        
        super(VFAE, self).__init__(**kwargs)
        
        self.eps = tf.constant([10e-25])
        self.beta=1.
        
        self.encoder = encoder
        self.encoder_z = encoder_z
        self.reconstructor_z = reconstructor_z
        self.decoder = decoder
        self.classifier = classifier
        
        self.loss_type = loss_type
        self.total_loss_tracker = Mean(name="total_loss")
        self.prediction_loss_tracker = Mean(name="pred_loss")
        self.kl_loss_tracker = Mean(name="kl_loss")
        self.mmd_loss_tracker = Mean(name="mmd_loss")
        self.reconst_loss_tracker = Mean(name="reconst_loss")
        self.reconst_z_loss_tracker = Mean(name="reconst_z_loss")


    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.prediction_loss_tracker,
            self.kl_loss_tracker,
            self.mmd_loss_tracker,
            self.reconst_loss_tracker,
            self.reconst_z_loss_tracker
        ]
    
    
    def call(self, inputs):
        X, y = inputs
        y = tf.reshape(y, (-1,1))
        
        sens, _ = split_sensitive_X(X, 0, 1)
        
        z_mean, z_log_sigma, z = self.encoder(X)
        q_z_1_mean, q_z_1_log_sigma, z_1 = self.encoder_z(tf.concat([z,y], axis=1))
        
        reconst = self.decoder(tf.concat([z, sens], axis=1))
        z_reconst_mean, z_reconst_log_sigma, _ = self.reconstructor_z(tf.concat([z_1, y], axis=1))

        preds = self.classifier(z)

        return z_mean, z_log_sigma, z,                      \
                reconst, q_z_1_mean, q_z_1_log_sigma, z_1,   \
                z_reconst_mean, z_reconst_log_sigma, preds
    

        
    def train_step(self, data):
        X, y = data
        with tf.GradientTape() as tape:
            
            z_mean, z_log_sigma, z,                                       \
            reconst, q_z_1_mean, q_z_1_log_sigma, z_1,                    \
            z_reconst_mean, z_reconst_log_sigma, preds = self.call(data)
    
            reconst_loss = neg_log_bernoulli(X, reconst, rec=1)
            reconst_z_loss = negative_log_gaussian(z, z_reconst_mean, z_reconst_log_sigma)
            classifier_loss = neg_log_bernoulli(y, preds)
            kl_loss = KL(q_z_1_mean, q_z_1_log_sigma)
            mmd_loss = mmd_loss(X, z)
            entropy_z = entropy_gaussian(z_mean, z_log_sigma)
            
            total_loss = reconst_loss + kl_loss + reconst_z_loss - entropy_z + self.beta*classifier_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.prediction_loss_tracker.update_state(classifier_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        self.mmd_loss_tracker.update_state(mmd_loss)
        self.reconst_loss_tracker.update_state(reconst_loss)
        self.reconst_z_loss_tracker.update_state(reconst_z_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "classification_loss": self.prediction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            "mmd_loss": self.mmd_loss_tracker.result(),
            "reconst_loss": self.reconst_loss_tracker.result(),
            "reconst_z_loss": self.reconst_z_loss_tracker.result()
        }
コード例 #16
0
class LFPNet(Model):
    def __init__(self, encoder, planner, actor, beta) -> None:
        super(LFPNet, self).__init__()
        self.encoder = encoder
        self.planner = planner
        self.actor = actor
        self.beta = beta
        self.total_loss_tracker = Mean(name="total_loss")
        self.action_loss_tracker = Mean(name="action_loss")
        self.reg_loss_tracker = Mean(name="reg_loss")

    def call(self, inputs, planner=True, training=False):
        if planner:
            z = self.planner(
                [inputs['obs'][:, 0, :], inputs['goals'][:, 0, :]])
        else:
            z = self.encoder([inputs['obs'], inputs['acts']])
        z_tiled = tf.tile(tf.expand_dims(z[0], 1),
                          (1, inputs['obs'].shape[1], 1))
        acts = self.actor([inputs['obs'], z_tiled, inputs['goals']])
        return acts, z

    def train_step(self, inputs):
        with tf.GradientTape() as tape:
            acts_enc, z_enc = self(inputs, planner=False, training=True)
            acts_plan, z_plan = self(inputs, planner=True, training=True)
            act_loss = self.compiled_loss(inputs['acts'],
                                          acts_enc,
                                          regularization_losses=self.losses)
            reg_loss = tfd.kl_divergence(z_enc, z_plan)
            loss = act_loss + self.beta * reg_loss

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients,
                                           self.trainable_variables))
        # Update metrics (includes the metric that tracks the loss)
        self.total_loss_tracker.update_state(loss)
        self.action_loss_tracker.update_state(act_loss)
        self.reg_loss_tracker.update_state(reg_loss)
        result = {m.name: m.result() for m in self.metrics}
        result['beta'] = self.beta
        return result

    def test_step(self, inputs):
        acts_enc, z_enc = self(inputs, planner=False, training=False)
        acts_plan, z_plan = self(inputs, planner=True, training=False)
        act_loss = self.compiled_loss(inputs['acts'],
                                      acts_plan,
                                      regularization_losses=self.losses)
        reg_loss = tfd.kl_divergence(z_enc, z_plan)
        loss = act_loss + self.beta * reg_loss

        # Update metrics (includes the metric that tracks the loss)
        self.total_loss_tracker.update_state(loss)
        self.action_loss_tracker.update_state(act_loss)
        self.reg_loss_tracker.update_state(reg_loss)
        return {m.name: m.result() for m in self.metrics}

    @property
    def metrics(self):
        return [
            self.total_loss_tracker, self.action_loss_tracker,
            self.reg_loss_tracker
        ]
コード例 #17
0
class LatentStateAction(Model):
    def __init__(self, n_obs, n_act, latent_dim, warmup_steps, **kwargs):
        super(LatentStateAction, self).__init__(**kwargs)

        self.action_encoder = Encoder(n_act,
                                      latent_dim,
                                      is_variational=False,
                                      name_prefix='action_encoder')
        self.state_encoder = Encoder(n_obs,
                                     latent_dim,
                                     name_prefix='state_encoder')

        self.action_decoder = Decoder(n_act,
                                      latent_dim,
                                      name_prefix='action_decoder')
        self.state_decoder = Decoder(n_obs,
                                     latent_dim,
                                     name_prefix='state_decoder')

        self.action_input = Input(shape=(n_act, ))
        self.action_output = self.action_decoder(
            self.action_encoder(self.action_input))
        self.action_ae_model = Model(self.action_input,
                                     self.action_output,
                                     name='action_ae')

        self.state_input = Input(shape=(n_obs, ))
        self.state_output = self.state_decoder(
            self.state_encoder(self.state_input)[-1])
        self.state_ae_model = Model(self.state_input,
                                    self.state_output,
                                    name='state_vae')

        self.model = Model(inputs=[self.action_input, self.state_input],
                           outputs=[self.action_output, self.state_output])
        self.compile(optimizer=Adam(learning_rate=1e-2))

        # Hyper-parameters
        self.warmup_steps = tf.constant(warmup_steps, dtype=tf.int32)
        self.it = tf.Variable(0, dtype=tf.int32)

        # Logging
        self.action_loss_tracker = Mean(name='action_loss')
        self.state_recon_loss_tracker = Mean(name='state_recon_loss')
        self.state_kl_loss_tracker = Mean(name='state_kl_loss')

    def call(self, inputs, training=None, mask=None):
        return self.model([inputs['acts'], inputs['obs1']])

    @property
    def metrics(self):
        return [
            self.action_loss_tracker, self.state_recon_loss_tracker,
            self.state_kl_loss_tracker
        ]

    # @tf.function
    def train_step(self, data):
        data = data[0]

        with tf.GradientTape(persistent=True) as tape:
            # Get state encoder output
            zs_mean, zs_log_var, zs = self.state_encoder(data['obs1'])

            # How good are we at reconstructing state?
            state_reconstruction = self.state_decoder(zs)
            state_reconstruction_loss = tf.reduce_mean(
                tf.square(data['obs1'] - state_reconstruction))

            # How much regularized is our latent state space?
            state_kl_loss = -0.5 * (1 + zs_log_var - tf.square(zs_mean) -
                                    tf.exp(zs_log_var))
            state_kl_loss = tf.reduce_mean(tf.reduce_sum(state_kl_loss,
                                                         axis=1))

            # Mask state_kl_loss during warm-up phase
            state_kl_loss = tf.cond(pred=tf.math.greater_equal(
                self.it, self.warmup_steps),
                                    true_fn=lambda: state_kl_loss,
                                    false_fn=lambda: 0.0)

            # Find state VAE total loss
            total_state_loss = state_reconstruction_loss + 4 * state_kl_loss

            # Get action encoder output
            za = self.action_encoder(data['acts'])

            # How good are we at reconstructing action?
            action_reconstruction = self.action_decoder(za)
            action_reconstruction_loss = tf.reduce_mean(
                tf.square(data['acts'] - action_reconstruction))

            # Get encoded next state using current state encoder
            zs_tp1 = self.state_encoder(data['obs2'])[-1]

            # Predict next state assuming canonical representation
            pred_zs_tp1 = tf.add(za, zs_tp1)

            # Get action AE total loss
            latent_matching_loss = tf.reduce_mean(
                tf.square(zs_tp1 - pred_zs_tp1))

            total_action_loss = latent_matching_loss + action_reconstruction_loss

            self.it.assign_add(1)

        # Get partial derivative wrt to losses and network params
        state_grads = tape.gradient(total_state_loss,
                                    self.state_ae_model.trainable_weights)
        action_grads = tape.gradient(total_action_loss,
                                     self.action_ae_model.trainable_weights)

        # Apply gradients
        self.optimizer.apply_gradients(
            zip(state_grads, self.state_ae_model.trainable_weights))
        self.optimizer.apply_gradients(
            zip(action_grads, self.action_ae_model.trainable_weights))

        # Logging
        self.action_loss_tracker.update_state(total_action_loss)
        self.state_recon_loss_tracker.update_state(state_reconstruction_loss)
        self.state_kl_loss_tracker.update_state(state_kl_loss)

        return {
            'action_loss': self.action_loss_tracker.result(),
            'state_recon_loss': self.state_recon_loss_tracker.result(),
            'state_kl_loss': self.state_kl_loss_tracker.result()
        }