예제 #1
0
class VFIB(keras.Model):
    def __init__(self, encoder, predictor, feature_dim,loss_type,  **kwargs):
        super(VFIB, self).__init__(**kwargs)
        self.encoder = encoder
        self.classifier = predictor
        self.loss_type = loss_type
        self.total_loss_tracker = Mean(name="total_loss")
        self.prediction_loss_tracker = Mean(name="prediction_loss")
        self.kl_loss_tracker = Mean(name="kl_loss")
        self.mmd_loss_tracker = Mean(name="mmd_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.prediction_loss_tracker,
            self.kl_loss_tracker,
            self.mmd_loss_tracker
        ]
    
    def call(self, inputs):
        # 0 refers to first column with sensitive feature 'Age'
        sens, _ = split_sensitive_X(inputs, 0, 1)
        mu, sig, z = self.encoder(inputs)
        preds = self.classifier(tf.concat([z, sens], 1))
        return mu, sig, z, preds
        
        
    def train_step(self, data):
        X, y = data
        with tf.GradientTape() as tape:
            
            z_mean, z_log_sigma, z, preds = self.call(X)

            prediction_loss = neg_log_bernoulli(y, preds)
            kl_loss = KL(z_mean, z_log_sigma)
            mmd_loss = mmd_loss(X, z)
            
            if self.loss_type=='all':
                total_loss =  prediction_loss+ kl_loss + mmd_loss
            elif self.loss_type=='kl':
                total_loss =  prediction_loss+ kl_loss
            else:
                total_loss =  prediction_loss
                
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.prediction_loss_tracker.update_state(prediction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        self.mmd_loss_tracker.update_state(mmd_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "classification_loss": self.prediction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            "mmd_loss": self.mmd_loss_tracker.result()
        }
예제 #2
0
파일: hider.py 프로젝트: csetraynor/guanyar
def train_wgain(dataset, gain, n_epoch, n_critic, alpha):
    '''Train wgain function
  
    Args:
      - dataset: A dataset TF2 object.
      - gain: a gain model.
      - n_epoch: number of iterations.
      - alpha: hyper-parameter
        
    Returns:
      - gain: Trained model
      - critic loss, generator loss and reconstruction loss for monitoring.
    '''

    generator, discriminator = gain.layers
    d_optimizer = keras.optimizers.RMSprop(lr=0.00005)
    g_optimizer = keras.optimizers.Adam()
    # Keep results for plotting
    train_d_loss_results = []
    train_g_loss_results = []
    train_rec_loss_results = []

    for epoch in range(n_epoch):
        epoch_d_loss_avg = Mean()
        epoch_g_loss_avg = Mean()
        epoch_rec_loss_avg = Mean()
        for x_batch, mask_batch in dataset:
            batch_size, dim = x_batch.shape
            # phase 1: train discriminator
            for _ in range(n_critic):
                hint = hint_generator(x_batch, mask_batch)
                generated_samples = generator(hint, training = True)
                discriminator.trainable = True
                d_loss, d_grads = discriminator_grad(discriminator, generated_samples, mask_batch[:,1:])
                d_optimizer.apply_gradients(zip(d_grads, discriminator.trainable_variables))
            # phase 2 - training the generator
            hint = hint_generator(x_batch, mask_batch)
            discriminator.trainable = False
            g_loss, g_grads = gain_grad(gain, hint)
            d_optimizer.apply_gradients(zip(g_grads, gain.trainable_variables))
            hint = hint_generator(x_batch, mask_batch)
            rec_loss, rec_grads = rec_grad(generator, hint, mask_batch, alpha)
            g_optimizer.apply_gradients(zip(rec_grads, gain.trainable_variables))
            # Track progress: Add current batch loss
            epoch_d_loss_avg.update_state(d_loss)
            epoch_g_loss_avg.update_state(g_loss)
            epoch_rec_loss_avg.update_state(rec_loss)
        # End epoch
        train_d_loss_results.append(epoch_d_loss_avg.result())
        train_g_loss_results.append(epoch_g_loss_avg.result())
        train_rec_loss_results.append(epoch_rec_loss_avg.result())

    return gain, train_d_loss_results, train_g_loss_results, train_rec_loss_results
예제 #3
0
def train_rgan(gan_model, dataset, n_epochs):
    
    generator_optimizer = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, clipnorm=1.) 
    discriminator_optimizer = keras.optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True, clipnorm=1.) 
    recurrent_generator, recurrent_discriminator = gan_model.layers
    
    # Keep results for plotting
    train_discriminator_loss_results = []
    train_generator_loss_results = []
    
    for epoch in range(n_epochs):
        epoch_discriminator_loss_avg = Mean()
        epoch_generator_loss_avg = Mean()

        for x_batch, mask_batch in dataset:
            no, seq_len , dim = x_batch.shape
            x_batch = cast(x_batch, float32)
            # phase 1 - training the discriminator
            noise = noise_generator(no, seq_len, dim)
            generated_samples = recurrent_generator(noise)
            x_fake_and_real = concat([generated_samples, x_batch], axis=1)
            y1 = cast(reshape(constant([[0.]] * seq_len + [[1.]] * seq_len), [seq_len*2, 1]), float32)
            y1 = tf.broadcast_to(y1, [no, seq_len*2, 1])
            mask1 = tf.ones([no, seq_len])
            mask_fake_and_real = concat([mask1, mask_batch], axis=1)
            recurrent_discriminator.trainable = True
            discriminator_loss_value, discriminator_grads = grad(recurrent_discriminator, x_fake_and_real, y1, mask_fake_and_real) 
            discriminator_optimizer.apply_gradients(zip(discriminator_grads, recurrent_discriminator.trainable_variables))
            # phase 2 - training the generator
            noise = noise_generator(no, seq_len, dim)
            y2 = cast(reshape( constant([[1.]] * seq_len), [seq_len, 1]), float32)
            y2 = tf.broadcast_to(y2, [no, seq_len, 1])
            recurrent_discriminator.trainable = False
            generator_loss_value, generator_grads = grad(gan_model, noise, y2, mask1) 
            generator_optimizer.apply_gradients(zip(generator_grads, gan_model.trainable_variables))
            # Track progress: Add current batch loss
            epoch_discriminator_loss_avg.update_state(discriminator_loss_value)
            epoch_generator_loss_avg.update_state(generator_loss_value)  
            
        # End epoch
        train_discriminator_loss_results.append(epoch_discriminator_loss_avg.result())
        train_generator_loss_results.append(epoch_generator_loss_avg.result())
        
        if epoch % 50 == 0:
            print("RGAN Epoch {:03d}: Discriminator Loss: {:.3f}".format(epoch, epoch_discriminator_loss_avg.result() ) , file=sys.stdout)
            print("RGAN Epoch {:03d}: Generator Loss: {:.3f}".format(epoch, epoch_generator_loss_avg.result() ) , file=sys.stdout)
            
    return gan_model, train_discriminator_loss_results, train_generator_loss_results
예제 #4
0
    def train_gan(self, train_ds, epochs, print_every, save_every,
                  log_filename, model_save_name):
        pls_metric = Mean()
        dls_metric = Mean()

        log_file = open(os.path.join(LOG_DIR, '{}.txt'.format(log_filename)),
                        'w+')
        log_file.close()

        print('----- Start training -----')
        epoch = 0
        for lr, hr in train_ds.take(epochs):
            epoch += 1
            step_time = time.time()

            generator_loss, discriminator_loss = self.train_step(lr, hr)

            # Apply metrics
            pls_metric(generator_loss)
            dls_metric(discriminator_loss)

            # Update log every 100 epochs
            if epoch == 1 or epoch % print_every == 0:
                print(
                    'Epoch {}/{}, time: {:.3f}s, generator loss = {:.4f}, discriminator loss = {:.4f}'
                    .format(epoch, epochs,
                            time.time() - step_time, pls_metric.result(),
                            dls_metric.result()))

                log_file = open(
                    os.path.join(LOG_DIR, '{}.txt'.format(log_filename)), 'a')
                log_file.write(
                    'Epoch {}/{}, time: {:.3f}s, generator loss = {:.4f}, discriminator loss = {:.4f}\n'
                    .format(epoch, epochs,
                            time.time() - step_time, pls_metric.result(),
                            dls_metric.result()))
                log_file.close()

                pls_metric.reset_states()
                dls_metric.reset_states()

            # Save model every 500 epochs
            if epoch % save_every == 0:
                generator.save(model_save_dir +
                               '/gen_{}_{}.h5'.format(model_save_name, epoch))
                discriminator.save(
                    model_save_dir +
                    '/dis_{}_{}.h5'.format(model_save_name, epoch))
예제 #5
0
def train(model, train_dataset, test_dataset, epochs, optimizer):
    # statistics to store
    elbos = []
    ssims = []
    print('Starting training...')
    # iterate over all epochs
    for epoch in range(0, epochs + 1):
        # iterate over train_dataset containing training images
        for x_train in train_dataset:
            train_step(model, x_train, optimizer)
        # feed the network test samples to generate new images
        predictions = model.generate_images(model, test_dataset)

        # display the results
        try:
            display_result(predictions)
        except:
            pass

        loss = Mean()
        for test_x in test_dataset:
            loss(calculate_loss(model, test_x))
        elbo = -loss.result()
        # evaluate the model using Structural Similarity between generated images and test samples and ELBO
        ssim = calculate_ssim(predictions, test_dataset)
        print("> " + str(epoch) + ": SSIM=" + str(ssim) + ', ELBO=' + str(elbo))
        # add the evaluatons to a list and plot the results later
        ssims.append(ssim)
        elbos.append(elbo)
    # return the trained model
    return model, elbos, ssims
예제 #6
0
    def train(self, train_dataset, valid_dataset, steps, evaluate_every=1000, save_best_only=False):
        loss_mean = Mean()

        ckpt_mgr = self.checkpoint_manager
        ckpt = self.checkpoint

        self.now = time.perf_counter()

        for lr, hr in train_dataset.take(steps - ckpt.step.numpy()):
            ckpt.step.assign_add(1)
            step = ckpt.step.numpy()

            loss = self.train_step(lr, hr)
            loss_mean(loss)

            print("Currently in the train step ",step)

            if step % evaluate_every == 0:
                loss_value = loss_mean.result()
                loss_mean.reset_states()

                # Compute PSNR on validation dataset
                psnr_value = self.evaluate(valid_dataset)

                duration = time.perf_counter() - self.now
                print(f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)')

                if save_best_only and psnr_value <= ckpt.psnr:
                    self.now = time.perf_counter()
                    continue

                ckpt.psnr = psnr_value
                ckpt_mgr.save()

                self.now = time.perf_counter()
예제 #7
0
def pre_train(generator, train_dataset, valid_dataset, steps, evaluate_every=1,lr_rate=1e-4):
    loss_mean = Mean()
    pre_train_loss = MeanSquaredError()
    pre_train_optimizer = Adam(lr_rate)

    now = time.perf_counter()

    step = 0
    for lr, hr in train_dataset.take(steps):
        step = step+1

        with tf.GradientTape() as tape:
            lr = tf.cast(lr, tf.float32)
            hr = tf.cast(hr, tf.float32)

            sr = generator(lr, training=True)
            loss_value = pre_train_loss(hr, sr)

        gradients = tape.gradient(loss_value, generator.trainable_variables)
        pre_train_optimizer.apply_gradients(zip(gradients, generator.trainable_variables))
        loss_mean(loss_value)

        if step % evaluate_every == 0:
            loss_value = loss_mean.result()
            loss_mean.reset_states()

            psnr_value = evaluate(generator, valid_dataset)

            duration = time.perf_counter() - now
            print(
                f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)')

            now = time.perf_counter()
예제 #8
0
    def train(self,
              data_generator,
              epochs=10,
              checkpoint_dir='./training_checkpoints'):
        # create checkpoint to save the training progression
        checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
        checkpoint = tf.train.Checkpoint(
            generator_optimizer=self.optimizer_g,
            discriminator_optimizer=self.optimizer_d,
            generator=self.generator,
            discriminator=self.discriminator)

        fixed_noise = get_noise(25)

        #print("Base noise:")
        #fake_images = generator(fixed_noise, training=False).numpy()
        #jupy_display(display(fake_images))

        # loop over epochs :
        for epoch in range(epochs):
            start = time.time()
            print("====== Epoch {:2d} ======".format(epoch))
            #initiaite the mean loss over the epoch for the discriminator and generator
            epoch_loss_d = Mean()
            epoch_loss_g = Mean()

            #epoch_len = tf.data.experimental.cardinality(data_generator)
            for i, real_images in enumerate(data_generator):
                loss_d, loss_g = self.train_step(real_images)
                epoch_loss_d(loss_d)
                epoch_loss_g(loss_g)

            print("\nDiscriminator: {}, Generator: {}".format(
                epoch_loss_d.result(), epoch_loss_g.result()))
            print('Time for epoch {} is {} sec'.format(epoch + 1,
                                                       time.time() - start))

            if (epoch + 1) % 5 == 0:
                checkpoint.save(file_prefix=checkpoint_prefix)
                fake_images = self.generator(fixed_noise, training=False)
                self.plot_generated_images(fake_images, epoch, save=True)
예제 #9
0
    def train(self, train_dataset, valid_dataset, save_best_only=False):
        loss_mean = Mean()

        ckpt_mgr = self.checkpoint_manager
        ckpt = self.checkpoint

        self.now = time.perf_counter()

        for lr, hr in train_dataset.take(self.args.num_iter -
                                         ckpt.step.numpy()):
            ckpt.step.assign_add(1)
            step = ckpt.step.numpy()

            loss = self.train_step(lr, hr)
            loss_mean(loss)

            loss_value = loss_mean.result()
            loss_mean.reset_states()

            lr_value = ckpt.optimizer._decayed_lr('float32').numpy()

            duration = time.perf_counter() - self.now
            self.now = time.perf_counter()

            if step % self.args.log_freq == 0:
                tf.summary.scalar('loss', loss_value, step=step)
                tf.summary.scalar('lr', lr_value, step=step)

            if step % self.args.print_freq == 0:
                print(
                    f'{step}/{self.args.num_iter}: loss = {loss_value.numpy():.3f} , lr = {lr_value:.6f} ({duration:.2f}s)'
                )

            if step % self.args.valid_freq == 0:
                psnr_value = self.evaluate(valid_dataset)
                ckpt.psnr = psnr_value
                tf.summary.scalar('psnr', psnr_value, step=step)

                print(
                    f'{step}/{self.args.num_iter}: loss = {loss_value.numpy():.3f}, lr = {lr_value:.6f}, PSNR = {psnr_value.numpy():3f}'
                )

            if step % self.args.save_freq == 0:
                # save weights only
                save_path = self.ckpt_path + '/weights-' + str(step) + '.h5'
                self.checkpoint.model.save_weights(filepath=save_path,
                                                   save_format='h5')

                # save ckpt (weights + other train status)
                ckpt_mgr.save(checkpoint_number=step)
예제 #10
0
class StandardVarianceBasedMetric(Metric):
    def __init__(self, name, dtype):
        super().__init__(name, dtype=dtype)
        self._mean = Mean(dtype=dtype)
        self._square_mean = Mean(dtype=dtype)

    @abstractmethod
    def _objective_function(self, y_true, y_pred):
        pass

    def update_state(self, y_true, y_pred, sample_weight=None):
        values = self._objective_function(y_true, y_pred)
        self._mean.update_state(values=values, sample_weight=sample_weight)
        self._square_mean.update_state(values=tf.square(values),
                                       sample_weight=sample_weight)

    def result(self):
        return tf.sqrt(self._square_mean.result() -
                       tf.square(self._mean.result()))

    def reset_states(self):
        self._mean.reset_states()
        self._square_mean.reset_states()
예제 #11
0
def gain_train_step(dataset, gain, n_epochs):
    generator, discriminator = gain.layers
    discriminator_optimizer = keras.optimizers.SGD(momentum=0.9, nesterov=True) 
    generator_optimizer = keras.optimizers.Adam()
    # Keep results for plotting
    train_discriminator_loss_results = []
    train_generator_loss_results = []
    
    for epoch in range(n_epochs):
        epoch_discriminator_loss_avg = Mean()
        epoch_generator_loss_avg = Mean()
        for x_batch, mask_batch in dataset:
            x_batch = cast(x_batch, float32)
            mask_batch = cast(mask_batch, float32)
            # phase 1: train discriminator
            hint = hint_generator(x_batch, mask_batch)
            generated_samples = generator(concat( [hint, mask_batch], axis = 1))
            discriminator.trainable = True
            discriminator_loss_value, discriminator_grads = gain_grad(discriminator, generated_samples, mask_batch) 
            discriminator_optimizer.apply_gradients(zip(discriminator_grads, discriminator.trainable_variables))
            # phase 2 - training the generator
            hint = hint_generator(x_batch, mask_batch)
            discriminator.trainable = False
            generator_loss_value, generator_grads = gain_grad(gain, concat( [hint, mask_batch], axis = 1), mask_batch) 
            generator_optimizer.apply_gradients(zip(generator_grads, gain.trainable_variables))
            # Track progress: Add current batch loss
            epoch_discriminator_loss_avg.update_state(discriminator_loss_value)
            epoch_generator_loss_avg.update_state(generator_loss_value)
            
        # End epoch
        train_discriminator_loss_results.append(epoch_discriminator_loss_avg.result())
        train_generator_loss_results.append(epoch_generator_loss_avg.result())
        if epoch % 50 == 0:
            print("GAIN Epoch {:03d}: Discriminator Loss: {:.3f}".format(epoch, epoch_discriminator_loss_avg.result() ) , file=sys.stdout)
            print("GAIN Epoch {:03d}: Generator Loss: {:.3f}".format(epoch, epoch_generator_loss_avg.result() ) , file=sys.stdout)
            
    return gain, train_discriminator_loss_results, train_generator_loss_results
예제 #12
0
class Leaner:
    def __init__(self, config: MuZeroConfig, storage: SharedStorage,
                 replay_buffer: ReplayBuffer):
        self.config = config
        self.storage = storage
        self.replay_buffer = replay_buffer
        self.summary = create_summary(name="leaner")
        self.metrics_loss = Mean(f'leaner-loss', dtype=tf.float32)
        self.network = Network(self.config)
        self.lr_schedule = ExponentialDecay(
            initial_learning_rate=self.config.lr_init,
            decay_steps=self.config.lr_decay_steps,
            decay_rate=self.config.lr_decay_rate)
        self.optimizer = Adam(learning_rate=self.lr_schedule)

    def start(self):
        while self.network.training_steps() < self.config.training_steps:
            if ray.get(self.replay_buffer.size.remote()) > 0:

                self.train()

                if self.network.training_steps(
                ) % self.config.checkpoint_interval == 0:
                    weigths = self.network.get_weights()
                    self.storage.update_network.remote(weigths)

                if self.network.training_steps(
                ) % self.config.save_interval == 0:
                    self.network.save()

        print("Finished")

    def train(self):
        batch = ray.get(self.replay_buffer.sample_batch.remote())

        with tf.GradientTape() as tape:
            loss = self.network.loss_function(batch)

        grads = tape.gradient(loss, self.network.get_variables())
        self.optimizer.apply_gradients(zip(grads,
                                           self.network.get_variables()))

        self.metrics_loss(loss)
        with self.summary.as_default():
            tf.summary.scalar(f'loss', self.metrics_loss.result(),
                              self.network.training_steps())
        self.metrics_loss.reset_states()

        self.network.update_training_steps()
예제 #13
0
    def train(self,
              train_dataset,
              valid_dataset,
              steps,
              evaluate_every=1000,
              save_best_only=False):
        loss_mean = Mean()

        ckpt_mgr = self.checkpoint_manager
        ckpt = self.checkpoint

        self.now = time.perf_counter()

        for lr, hr in train_dataset.take(steps - ckpt.step.numpy(
        )):  # for low_resolution+high_resolution image pair in dataset
            t_start = time.time()
            ckpt.step.assign_add(1)
            step = ckpt.step.numpy()
            loss = self.train_step(lr, hr)
            loss_mean(loss)
            t_end = time.time()
            print("epoch:%3d step:%2d loss:%.5f time:%.3f" %
                  (step / 50, step % 50, loss, t_end - t_start))

            # evaluate
            if step % evaluate_every == 0:
                loss_value = loss_mean.result()
                loss_mean.reset_states()

                # Compute PSNR on validation dataset
                psnr_value = self.evaluate(valid_dataset)

                duration = time.perf_counter() - self.now
                print(
                    f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)'
                )

                if save_best_only and psnr_value <= ckpt.psnr:  # if no PSNR improvement
                    self.now = time.perf_counter()
                    # skip saving checkpoint
                    continue

                ckpt.psnr = psnr_value
                ckpt_mgr.save()
                print("checkpoint saved!")

                self.now = time.perf_counter()
    def train(self,
              train_dataset,
              valid_dataset,
              steps,
              evaluate_every=1000,
              save_best_only=False):
        loss_mean = Mean()

        ckpt_mgr = self.checkpoint_manager
        ckpt = self.checkpoint

        vis_list = []

        for lr, hr in train_dataset.take(steps - ckpt.step.numpy()):
            ckpt.step.assign_add(1)
            step = ckpt.step.numpy()

            loss = self.train_step(lr, hr)
            loss_mean(loss)

            if step % evaluate_every == 0:
                loss_value = loss_mean.result()
                loss_mean.reset_states()

                # Compute PSNR on validation dataset
                psnr_value = self.evaluate(valid_dataset)

                print(
                    f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f}'
                )

                vis_list.append((step, loss_value, psnr_value))

                if save_best_only and psnr_value <= ckpt.psnr:
                    # skip saving checkpoint, no PSNR improvement
                    continue

                ckpt.psnr = psnr_value
                ckpt_mgr.save()

        # saving progress data to make graphs
        csv = open('./visLoss.csv', 'w')
        csv.write('step, loss, psnr\n')
        for vals in vis_list:
            csv.write('{},{},{}\n'.format(vals[0], vals[1], vals[2]))
        csv.close()
예제 #15
0
    def train(self,
              train_dataset,
              valid_dataset,
              steps,
              evaluate_every=1000,
              save_best_only=False):
        loss_mean = Mean()

        ckpt_mgr = self.checkpoint_manager
        ckpt = self.checkpoint

        self.now = time.perf_counter()

        for lr, hr in train_dataset.take(steps - ckpt.step.numpy()):
            #print('check1..', steps, ckpt.step.numpy())
            ckpt.step.assign_add(1)
            step = ckpt.step.numpy()

            loss = self.train_step(lr, hr)
            loss_mean(loss)

            if step % evaluate_every == 0:
                loss_value = loss_mean.result()
                loss_mean.reset_states()

                # Compute PSNR on validation dataset
                psnr_value = self.evaluate(valid_dataset)

                duration = time.perf_counter() - self.now
                print(
                    f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)'
                )
                #########
                self.resolve_and_plot('demo/img_0', step)
                #########

                if save_best_only and psnr_value <= ckpt.psnr:
                    self.now = time.perf_counter()
                    # skip saving checkpoint, no PSNR improvement
                    continue

                ckpt.psnr = psnr_value
                ckpt_mgr.save()

                self.now = time.perf_counter()
예제 #16
0
class MeanBasedMetric(Metric):
    def __init__(self, name, dtype):
        super().__init__(name, dtype=dtype)
        self._mean = Mean(dtype=dtype)

    @abstractmethod
    def _objective_function(self, y_true, y_pred):
        pass

    def update_state(self, y_true, y_pred, sample_weight=None):
        values = self._objective_function(y_true, y_pred)
        self._mean.update_state(values=values, sample_weight=sample_weight)

    def result(self):
        return self._mean.result()

    def reset_states(self):
        self._mean.reset_states()
예제 #17
0
    def train_generator(self,
                        train_dataset,
                        valid_dataset,
                        epochs=20000,
                        valid_lr=None,
                        valid_hr=None):
        evaluate_size = epochs / 10

        loss_mean = Mean()

        start_time = time.time()
        epoch = 0

        for lr, hr in train_dataset.take(epochs):
            epoch += 1
            step = tf.convert_to_tensor(epoch, dtype=tf.int64)
            generator_loss = self.train_generator_step(lr, hr)
            loss_mean(generator_loss)

            if epoch % 50 == 0:
                loss_value = loss_mean.result()
                loss_mean.reset_states()

                psnr_value = self.evaluate(valid_dataset.take(1))

                print(
                    f'Time for epoch {epoch}/{epochs} is {(time.time() - start_time):.4f} sec, '
                    f'gan loss = {loss_value:.4f}, psnr = {psnr_value:.4f}')
                start_time = time.time()

                if self.summary_writer is not None:
                    with self.summary_writer.as_default():
                        tf.summary.scalar('generator_loss',
                                          loss_value,
                                          step=epoch)
                        tf.summary.scalar('psnr', psnr_value, step=epoch)

            if epoch % evaluate_size == 0:
                self.util.save_checkpoint(self.checkpoint, epoch)

            if epoch % 5000 == 0:
                self.generate_and_save_images(step, valid_lr, valid_hr)
예제 #18
0
    def train(self,
              train_dataset,
              valid_dataset,
              steps,
              evaluate_every=1000,
              save_best_only=False):
        loss_mean = Mean()

        ckpt_mgr = self.checkpoint_manager
        ckpt = self.checkpoint

        self.now = timeit.default_timer()

        for lr, hr in train_dataset.take(steps - ckpt.step.numpy()):
            ckpt.step.assign_add(1)
            step = ckpt.step.numpy()

            loss = self.train_step(lr, hr)
            loss_mean(loss)

            if step % evaluate_every == 0:
                loss_value = loss_mean.result()
                loss_mean.reset_states()

                # Compute PSNR on validation dataset
                psnr_value, ssim_value = self.evaluate(valid_dataset)

                duration = timeit.default_timer() - self.now
                print('%d/%d: loss = %.3f, PSNR = %3f (%.2fs)' %
                      (step, steps, loss_value.numpy(), psnr_value.numpy(),
                       duration))

                if save_best_only and psnr_value <= ckpt.psnr:
                    self.now = timeit.timeit()
                    # skip saving checkpoint, no PSNR improvement
                    continue

                ckpt.psnr = psnr_value
                ckpt_mgr.save()

                self.now = timeit.timeit()
예제 #19
0
    def train(self, train_ds, valid_ds, steps, evaluate_every=1000, save_best_only=False):
        loss_mean = Mean()

        ckpt_mgr = self.checkpoint_manager
        ckpt = self.checkpoint

        self.now = time.perf_counter()

        for lr, hr in train_ds.take(steps - ckpt.step.numpy()):
            ckpt.step.assign_add(1)
            step = ckpt.step.numpy()

            loss = self.train_step(lr, hr)
            loss_mean(loss)

            if step % evaluate_every == 0:
                # Record loss value
                loss_value = loss_mean.result()
                loss_mean.reset_states()
                
                # Comput PSNR on validation set
                psnr_value = self.evaluate(valid_ds)
                
                # Calculate time consumed
                duration = time.perf_counter() - self.now
                print('{}/{}: loss = {:.3f}, PSNR = {:.3f} ({:.2f}s)'.format(step, steps, loss_value.numpy(), psnr_value.numpy(), duration))

                # Skip checkpoint if PSNR does not improve
                if save_best_only and psnr_value <= ckpt.psnr:
                    self.now = time.perf_counter()
                    continue
                
                # Save checkpoint
                ckpt.psnr = psnr_value
                ckpt_mgr.save()

                self.now = time.perf_counter()
예제 #20
0
class ModelTrainer:
    """
    Note:
    Having this model keeps the trainStep and testStep instance new every time you call it.
    Implementing those functions outside a class will return an error
    ValueError: Creating variables on a non-first call to a function decorated with tf.function.
    """
    def __init__(self,
                 model,
                 loss,
                 metric,
                 optimizer,
                 ckptDir,
                 logDir,
                 multiGPU=True,
                 evalStep=1000):

        # Safety checks
        self.logDirTrain = os.path.join(logDir, 'Train')
        self.logDirTest = os.path.join(logDir, 'Test')

        if not os.path.exists(ckptDir):
            os.makedirs(ckptDir)
        if not os.path.exists(self.logDirTrain):
            os.makedirs(self.logDirTrain)
        if not os.path.exists(self.logDirTest):
            os.makedirs(self.logDirTest)

        self.trainWriter = tf.summary.create_file_writer(self.logDirTrain)
        self.testWriter = tf.summary.create_file_writer(self.logDirTest)

        self.ckpt = tf.train.Checkpoint(step=tf.Variable(0),
                                        psnr=tf.Variable(1.0),
                                        optimizer=optimizer,
                                        model=model)
        self.ckptMngr = tf.train.CheckpointManager(checkpoint=self.ckpt,
                                                   directory=ckptDir,
                                                   max_to_keep=5)

        self.loss = loss
        self.metric = metric

        self.accTestLoss = Mean(name='accTestLoss')
        self.accTestPSNR = Mean(name='accTestPSNR')
        self.accTrainLoss = Mean(name='accTrainLoss')
        self.accTrainPSNR = Mean(name='accTrainPSNR')
        self.evalStep = evalStep
        self.multiGPU = multiGPU
        self.strategy = None
        self.restore()

    @property
    def model(self):
        return self.ckpt.model

    def restore(self):
        if self.ckptMngr.latest_checkpoint:
            self.ckpt.restore(self.ckptMngr.latest_checkpoint)
            print(
                f'[ INFO ] Model restored from checkpoint at step {self.ckpt.step.numpy()}.'
            )

    def fitTrainData(self,
                     X: tf.Tensor,
                     y: tf.Tensor,
                     globalBatchSize: int,
                     epochs: int,
                     valData: List[np.ma.array],
                     bufferSize: int = 128,
                     valSteps: int = 64,
                     saveBestOnly: bool = True,
                     initEpoch: int = 0):

        logger.info('[ INFO ] Loading data set to buffer cache...')
        trainSet = loadTrainDataAsTFDataSet(X, y[0], y[1], epochs,
                                            globalBatchSize, bufferSize)
        valSet = loadValDataAsTFDataSet(valData[0], valData[1], valData[2],
                                        valSteps, globalBatchSize, bufferSize)
        logger.info('[ INFO ] Loading success...')

        dataSetLength = len(X)
        totalSteps = tf.cast(dataSetLength / globalBatchSize, tf.int64)
        globalStep = tf.cast(self.ckpt.step, tf.int64)
        step = globalStep % totalSteps
        epoch = initEpoch

        logger.info('[ INFO ] Begin training...')

        for x_batch_train, y_batch_train, y_mask_batch_train in trainSet:
            if (totalSteps - step) == 0:
                epoch += 1
                step = tf.cast(self.ckpt.step, tf.int64) % totalSteps
                logger.info(
                    f'[ ***************  NEW EPOCH  *************** ] Epoch number {epoch}'
                )
                # Reset metrics
                self.accTrainLoss.reset_states()
                self.accTrainPSNR.reset_states()
                self.accTestLoss.reset_states()
                self.accTestPSNR.reset_states()

            step += 1
            globalStep += 1
            self.trainStep(x_batch_train, y_batch_train, y_mask_batch_train)
            self.ckpt.step.assign_add(1)

            t = f"[ EPOCH {epoch}/{epochs} ] - [ STEP {step}/{int(totalSteps)} ] Loss: {self.accTrainLoss.result():.3f}, cPSNR: {self.accTrainPSNR.result():.3f}"
            logger.info(t)

            self.saveLog('Train', globalStep)

            if step != 0 and (step % self.evalStep) == 0:
                # Reset states for test
                self.accTestLoss.reset_states()
                self.accTestPSNR.reset_states()
                for x_batch_val, y_batch_val, y_mask_batch_val in valSet:
                    self.testStep(x_batch_val, y_batch_val, y_mask_batch_val)
                self.saveLog('Test', globalStep)
                t = f"[ *************** VAL INFO *************** ] Validation Loss: {self.accTestLoss.result():.3f}, Validation PSNR: {self.accTestPSNR.result():.3f}"
                logger.info(t)

                if saveBestOnly and (self.accTestPSNR.result() <=
                                     self.ckpt.psnr):
                    continue

                logger.info('[ SAVE ] Saving checkpoint...')
                self.ckpt.psnr = self.accTestPSNR.result()
                self.ckptMngr.save()

    @tf.function
    def trainStep(self, patchLR, patchHR, maskHR):
        with tf.GradientTape() as tape:
            predPatchHR = self.ckpt.model(patchLR, training=True)
            # Loss(patchHR: tf.Tensor, maskHR: tf.Tensor, predPatchHR: tf.Tensor)
            loss = self.loss(patchHR, maskHR, predPatchHR)

        gradients = tape.gradient(loss, self.ckpt.model.trainable_variables)
        self.ckpt.optimizer.apply_gradients(
            zip(gradients, self.ckpt.model.trainable_variables))
        metric = self.metric(patchHR, maskHR, predPatchHR)
        self.accTrainLoss(loss)
        self.accTrainPSNR(metric)

    @tf.function
    def testStep(self, patchLR, patchHR, maskHR):
        predPatchHR = self.ckpt.model(patchLR, training=False)
        loss = self.loss(patchHR, maskHR, predPatchHR)
        metric = self.metric(patchHR, maskHR, predPatchHR)
        self.accTestLoss(loss)
        self.accTestPSNR(metric)

    def saveLog(self, testOrTrain, globalStep):
        w = self.trainWriter if testOrTrain == 'Train' else self.testWriter
        with w.as_default():
            if testOrTrain == 'Train':
                tf.summary.scalar('PSNR',
                                  self.accTrainPSNR.result(),
                                  step=globalStep)
                tf.summary.scalar('Loss',
                                  self.accTrainLoss.result(),
                                  step=globalStep)
            else:
                tf.summary.scalar('PSNR',
                                  self.accTestPSNR.result(),
                                  step=globalStep)
                tf.summary.scalar('Loss',
                                  self.accTestLoss.result(),
                                  step=globalStep)
            w.flush()
        loss_value, grads = training.grad(x_train_, y_train_)
        optimizer.apply_gradients(zip(grads, teacher_model.trainable_weights))
        loss_value_test = training.loss(x_val_, y_val_)
        probs = tf.nn.softmax(teacher_model(x_train_))
        probs_val = tf.nn.softmax(teacher_model(x_val_))

        loss_metric(loss_value)
        acc_metric(acc(y_train_, probs))
        loss_metric_val(loss_value_test)
        acc_metric_val(acc_val(y_val_, probs_val))

    # 学習進捗の表示
    print(
        'Epoch {}/{}: Loss: {:.3f}, Accuracy: {:.3%}, Validation Loss: {:.3f}, Validation Accuracy: {:.3%}'
        .format(epoch, EPOCHS_T,
                loss_metric.result().numpy(),
                acc_metric.result().numpy(),
                loss_metric_val.result().numpy(),
                acc_metric_val.result().numpy()))
    # LossとAccuracyの記録(後でグラフにプロットするため)
    history_teacher.losses.append(loss_metric.result().numpy())
    history_teacher.accuracy.append(acc_metric.result().numpy() * 100)
    history_teacher.losses_val.append(loss_metric_val.result().numpy())
    history_teacher.accuracy_val.append(acc_metric_val.result().numpy() * 100)

# バッチサイズ変更
ds_train = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(x_train.shape[0]).batch(BATCH_SIZE_S)
ds_val = tf.data.Dataset.from_tensor_slices(
    (x_val, y_val)).shuffle(x_val.shape[0]).batch(BATCH_SIZE_S)
ds_test = tf.data.Dataset.from_tensor_slices(
예제 #22
0
class ModelTrainer:
    """
    Note:
    Having this model keeps the trainStep and testStep instance new every time you call it.
    Implementing those functions outside a class will return an error
    ValueError: Creating variables on a non-first call to a function decorated with tf.function.
    """
    def __init__(self,
                 model,
                 loss,
                 metric,
                 optimizer,
                 ckptDir,
                 logDir,
                 strategy,
                 multiGPU=True,
                 evalStep=10):

        # Safety checks
        if not os.path.exists(ckptDir):
            os.makedirs(ckptDir)
        if not os.path.exists(logDir):
            os.makedirs(logDir)

        self.ckpt = tf.train.Checkpoint(step=tf.Variable(0),
                                        psnr=tf.Variable(1.0),
                                        optimizer=optimizer,
                                        model=model)
        self.ckptMngr = tf.train.CheckpointManager(checkpoint=self.ckpt,
                                                   directory=ckptDir,
                                                   max_to_keep=5)
        self.loss = loss
        self.metric = metric
        self.logDir = logDir
        self.trainLoss = Mean(name='trainLoss')
        self.trainPSNR = Mean(name='trainPSNR')
        self.testLoss = Mean(name='testLoss')
        self.testPSNR = Mean(name='testPSNR')
        self.evalStep = evalStep
        self.multiGPU = multiGPU
        self.strategy = strategy
        self.restore()

    @property
    def model(self):
        return self.ckpt.model

    def restore(self):
        if self.ckptMngr.latest_checkpoint:
            self.ckpt.restore(self.ckptMngr.latest_checkpoint)
            print(
                f'[ INFO ] Model restored from checkpoint at step {self.ckpt.step.numpy()}.'
            )

    def fitTrainData(self,
                     X: tf.Tensor,
                     y: tf.Tensor,
                     batchSize: int,
                     epochs: int,
                     valData: List[np.ma.array],
                     bufferSize: int = 256,
                     valSteps: int = 128,
                     saveBestOnly: bool = True,
                     initEpoch: int = 0):
        if self.multiGPU:
            logger.info('[ INFO ] Multi-GPU mode selected...')
            logger.info('[ INFO ] Instantiate strategy...')
            batchSizePerReplica = batchSize
            globalBatchSize = batchSizePerReplica * self.strategy.num_replicas_in_sync
        else:
            globalBatchSize = batchSize

        logger.info('[ INFO ] Loading data set to buffer cache...')
        trainSet = loadTrainDataAsTFDataSet(X, y[0], y[1], epochs,
                                            globalBatchSize, bufferSize)
        valSet = loadValDataAsTFDataSet(valData[0], valData[1], valData[2],
                                        valSteps, globalBatchSize, bufferSize)
        logger.info('[ INFO ] Loading success...')

        if self.multiGPU:
            logger.info('[ INFO ] Distributing train set...')
            trainSet = self.strategy.experimental_distribute_dataset(trainSet)
            logger.info('[ INFO ] Distributing test set...')
            valSet = self.strategy.experimental_distribute_dataset(valSet)

        w = tf.summary.create_file_writer(self.logDir)

        dataSetLength = len(X)
        totalSteps = tf.cast(dataSetLength / globalBatchSize, tf.int64)
        globalStep = tf.cast(self.ckpt.step, tf.int64)
        step = globalStep % totalSteps
        epoch = initEpoch

        logger.info('[ INFO ] Begin training...')
        with w.as_default():
            for x_batch_train, y_batch_train, y_mask_batch_train in trainSet:
                if (totalSteps - step) == 0:
                    epoch += 1
                    step = tf.cast(self.ckpt.step, tf.int64) % totalSteps
                    logger.info(f'[ NEW EPOCH ] Epoch number {epoch}')
                    # Reset metrics
                    self.trainLoss.reset_states()
                    self.trainPSNR.reset_states()
                    self.testLoss.reset_states()
                    self.testPSNR.reset_states()

                step += 1
                globalStep += 1
                self.trainDistStep(x_batch_train, y_batch_train,
                                   y_mask_batch_train)
                self.ckpt.step.assign_add(1)

                t = f"[ EPOCH {epoch}/{epochs} ] Step {step}/{int(totalSteps)}, Loss: {self.trainLoss.result():.3f}, cPSNR: {self.trainPSNR.result():.3f}"
                logger.info(t)

                tf.summary.scalar('Train PSNR',
                                  self.trainPSNR.result(),
                                  step=globalStep)
                tf.summary.scalar('Train loss',
                                  self.trainLoss.result(),
                                  step=globalStep)

                if step != 0 and (step % self.evalStep) == 0:
                    # Reset states for test
                    self.testLoss.reset_states()
                    self.testPSNR.reset_states()
                    for x_batch_val, y_batch_val, y_mask_batch_val in valSet:
                        self.testDistStep(x_batch_val, y_batch_val,
                                          y_mask_batch_val)
                    tf.summary.scalar('Test loss',
                                      self.testLoss.result(),
                                      step=globalStep)
                    tf.summary.scalar('Test PSNR',
                                      self.testPSNR.result(),
                                      step=globalStep)
                    t = f"[ VAL INFO ] Validation Loss: {self.testLoss.result():.3f}, Validation PSNR: {self.testPSNR.result():.3f}"
                    logger.info(t)
                    w.flush()

                    if saveBestOnly and (self.testPSNR.result() <=
                                         self.ckpt.psnr):
                        continue

                    logger.info('[ SAVE ] Saving checkpoint...')
                    self.ckpt.psnr = self.testPSNR.result()
                    self.ckptMngr.save()

    def computeLoss(self, patchHR, maskHR, predPatchHR):
        loss = tf.reduce_sum(self.loss(patchHR, maskHR,
                                       predPatchHR)) * (1.0 / self.batchSize)
        loss += (sum(self.ckpt.model.losses) * 1.0 /
                 self.strategy.num_replicas_in_sync)
        return loss

    def calcMetric(self, patchHR, maskHR, predPatchHR):
        return self.metric(patchHR, maskHR, predPatchHR)

    @tf.function
    def trainStep(self, patchLR, patchHR, maskHR):
        with tf.GradientTape() as tape:
            predPatchHR = self.ckpt.model(patchLR, training=True)
            # Loss(patchHR: tf.Tensor, maskHR: tf.Tensor, predPatchHR: tf.Tensor)
            loss = self.loss(patchHR, maskHR, predPatchHR)

        gradients = tape.gradient(loss, self.ckpt.model.trainable_variables)
        self.ckpt.optimizer.apply_gradients(
            zip(gradients, self.ckpt.model.trainable_variables))
        return loss

    @tf.function
    def testStep(self, patchLR, patchHR, maskHR):
        predPatchHR = self.ckpt.model(patchLR, training=False)
        loss = self.loss(patchHR, maskHR, predPatchHR)
        return loss

    @tf.function
    def trainDistStep(self, patchLR, patchHR, maskHR):
        perExampleLosses = self.strategy.experimental_run_v2(self.trainStep,
                                                             args=(patchLR,
                                                                   patchHR,
                                                                   maskHR))
        perExampleMetric = self.strategy.experimental_run_v2(self.calcMetric,
                                                             args=(patchLR,
                                                                   patchHR,
                                                                   maskHR))
        meanLoss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                        perExampleLosses,
                                        axis=0)
        meanMetric = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                          perExampleMetric,
                                          axis=0)
        self.trainLoss(meanLoss)
        self.trainPSNR(meanMetric)

    @tf.function
    def testDistStep(self, patchLR, patchHR, maskHR):
        perExampleLosses = self.strategy.experimental_run_v2(self.testStep,
                                                             args=(patchLR,
                                                                   patchHR,
                                                                   maskHR))
        perExampleMetric = self.strategy.experimental_run_v2(self.calcMetric,
                                                             args=(patchLR,
                                                                   patchHR,
                                                                   maskHR))
        meanLoss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                        perExampleLosses,
                                        axis=0)
        meanMetric = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                          perExampleMetric,
                                          axis=0)
        self.testLoss(meanLoss)
        self.testPSNR(meanMetric)
예제 #23
0
    # Extra line of printout because the ProgBar would overwrite the logs in the terminal
    print("Epoch: {}/{}".format(epoch + 1, EPOCHS))
    print("Epoch: {}/{}".format(epoch + 1, EPOCHS))
    start = time.time()

    # Iterate over the batches of the dataset.
    for step, x_batch in enumerate(ds_train):
        with tf.GradientTape() as tape:
            loss = vae(image=x_batch, return_recon_loss=True)

        grads = tape.gradient(loss, vae.trainable_weights)
        optimizer.apply_gradients([
            (grad, var) for (grad, var) in zip(grads, vae.trainable_variables)
            if grad is not None
        ])

        loss_metric(loss)
        progress_bar.update(step)

    end = time.time()
    time_per_step = (end - start) * 1000 / steps
    print(" - {:.3f}ms/step - loss: {:.6f}".format(time_per_step,
                                                   loss_metric.result()))

    if (epoch + 1) % 5 == 0 and epoch != 0:
        vae.save_weights("./dalle_tensorflow/model_weights/vae/vae_weights" +
                         "_" + str(epoch + 1))

# Save the model weights (subclassed model cannot use save_model)
vae.save_weights("./dalle_tensorflow/model_weights/vae/vae_weights")
예제 #24
0
    def train_gan(self,
                  train_dataset,
                  valid_dataset,
                  epochs=200000,
                  valid_lr=None,
                  valid_hr=None):
        evaluate_size = epochs / 10
        start = time.time()
        vgg_metric = Mean()
        dls_metric = Mean()
        g_metric = Mean()
        c_metric = Mean()
        epoch = 0

        for lr, hr in train_dataset.take(epochs):
            epoch += 1
            step = tf.convert_to_tensor(epoch, tf.int64)
            vgg_loss, discremenator_loss, generator_loss, content_loss = self.train_gan_step(
                lr, hr)
            vgg_metric(vgg_loss)
            dls_metric(discremenator_loss)
            g_metric(generator_loss)
            c_metric(content_loss)

            if epoch % 50 == 0:
                vgg = vgg_metric.result()
                discriminator_loss_metric = dls_metric.result()
                generator_loss_metric = g_metric.result()
                content_loss_metric = c_metric.result()

                vgg_metric.reset_states()
                dls_metric.reset_states()
                g_metric.reset_states()
                c_metric.reset_states()

                psnr_value = self.evaluate(valid_dataset.take(1))

                print(
                    f'Time for epoch {epoch}/{epochs} is {(time.time() - start):.4f} sec, '
                    f' perceptual loss = {vgg:.4f},'
                    f' generator loss = {generator_loss_metric:.4f},'
                    f' discriminator loss = {discriminator_loss_metric:.4f},'
                    f' content loss = {content_loss_metric:.4f},'
                    f' psnr = {psnr_value:.4f}')

                start = time.time()

                if self.summary_writer is not None:
                    with self.summary_writer.as_default():
                        tf.summary.scalar('generator_loss',
                                          generator_loss_metric,
                                          step=epoch)
                        tf.summary.scalar('content loss',
                                          content_loss_metric,
                                          step=epoch)
                        tf.summary.scalar(
                            'vgg loss = content loss + 0.0001 * gan loss',
                            vgg,
                            step=epoch)
                        tf.summary.scalar('discremenator_loss',
                                          discriminator_loss_metric,
                                          step=epoch)
                        tf.summary.scalar('psnr', psnr_value, step=epoch)

            if epoch % evaluate_size == 0:
                self.util.save_checkpoint(self.checkpoint, epoch)

            if epoch % 5000 == 0:
                self.generate_and_save_images(step, valid_lr, valid_hr)
예제 #25
0
@tf.function
def testing(images, labels):
    predicts = model(images)
    t_loss = loss_(labels, predicts)

    test_loss(t_loss)
    test_accuracy(labels, predicts)


# TRAINING
for epoch in range(EPOCHS):
    for train_images, train_labels in train:
        training(train_images, train_labels)

    for test_images, test_labels in test:
        testing(test_images, test_labels)

    to_print = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(
        to_print.format(epoch + 1, train_loss.result(),
                        train_accuracy.result() * 100, test_loss.result(),
                        test_accuracy.result() * 100))

    # Reset the metrics for the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    model.save_weights('model', save_format='tf')
예제 #26
0
class MNIST2MNIST_M_DANN(object):

    def __init__(self,config):
        """
        这是MNINST与MNIST_M域适配网络的初始化函数
        :param config: 参数配置类
        """
        # 初始化参数类
        self.cfg = config

        # 定义相关占位符
        self.grl_lambd = 1.0              # GRL层参数

        # 搭建深度域适配网络
        self.build_DANN()

        # 定义训练和验证损失与指标
        self.loss = categorical_crossentropy
        self.acc = categorical_accuracy

        self.train_loss = Mean("train_loss", dtype=tf.float32)
        self.train_image_cls_loss = Mean("train_image_cls_loss", dtype=tf.float32)
        self.train_domain_cls_loss = Mean("train_domain_cls_loss", dtype=tf.float32)
        self.train_image_cls_acc = Mean("train_image_cls_acc", dtype=tf.float32)
        self.train_domain_cls_acc = Mean("train_domain_cls_acc", dtype=tf.float32)
        self.val_loss = Mean("val_loss", dtype=tf.float32)
        self.val_image_cls_loss = Mean("val_image_cls_loss", dtype=tf.float32)
        self.val_domain_cls_loss = Mean("val_domain_cls_loss", dtype=tf.float32)
        self.val_image_cls_acc = Mean("val_image_cls_acc", dtype=tf.float32)
        self.val_domain_cls_acc = Mean("val_domain_cls_acc", dtype=tf.float32)

        # 定义优化器
        self.optimizer = tf.keras.optimizers.SGD(self.cfg.init_learning_rate,
                                                 momentum=self.cfg.momentum_rate)

        '''
        # 初始化早停策略
        self.early_stopping = EarlyStopping(min_delta=1e-5, patience=100, verbose=1)
        '''

    def build_DANN(self):
        """
        这是搭建域适配网络的函数
        :return:
        """
        # 定义源域、目标域的图像输入和DANN模型图像输入
        self.image_input = Input(shape=self.cfg.image_input_shape,name="image_input")

        # 域分类器与图像分类器的共享特征
        self.feature_encoder = build_feature_extractor()
        # 获取图像分类结果和域分类结果张量
        self.image_cls_encoder = build_image_classify_extractor()
        self.domain_cls_encoder = build_domain_classify_extractor()

        self.grl = GradientReversalLayer()

        self.dann_model = Model(self.image_input,
                                [self.image_cls_encoder(self.feature_encoder(self.image_input)),
                                 self.domain_cls_encoder(self.grl(self.feature_encoder(self.image_input)))])
        self.dann_model.summary()

        # 导入
        if self.cfg.pre_model_path is not None:
            self.dann_model.load_weights(self.cfg.pre_model_path,by_name=True,skip_mismatch=True)

    def train(self,train_source_datagen,train_target_datagen,
              val_target_datagen,train_iter_num,val_iter_num):
        """
        这是DANN的训练函数
        :param train_source_datagen: 源域训练数据集生成器
        :param train_target_datagen: 目标域训练数据集生成器
        :param val_datagen: 验证数据集生成器
        :param train_iter_num: 每个epoch的训练次数
        :param val_iter_num: 每次验证过程的验证次数
        """
        # 初始化相关文件目录路径
        time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        checkpoint_dir = os.path.join(self.cfg.checkpoints_dir,time)
        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)

        log_dir = os.path.join(self.cfg.logs_dir, time)
        if not os.path.exists(log_dir):
            os.mkdir(log_dir)

        self.cfg.save_config(time)

        self.writer_hyperparameter = tf.summary.create_file_writer(os.path.join(log_dir,"hyperparameter"))
        self.writer_train = tf.summary.create_file_writer(os.path.join(log_dir,"train"))
        self.writer_val = tf.summary.create_file_writer(os.path.join(log_dir,'validation'))

        print('\n----------- start to train -----------\n')
        with open(os.path.join(log_dir,'log.txt'),'w') as f:
            for ep in np.arange(1,self.cfg.epoch+1,1):
                # 初始化精度条
                self.progbar = Progbar(train_iter_num+1)
                print('Epoch {}/{}'.format(ep, self.cfg.epoch))

                # 进行一个周期的模型训练
                train_loss,train_image_cls_acc = self.train_one_epoch\
                    (train_source_datagen,train_target_datagen,train_iter_num,ep)
                # 进行一个周期的模型验证
                val_loss,val_image_cls_acc = self.eval_one_epoch(val_target_datagen,val_iter_num,ep)
                # 更新进度条
                self.progbar.update(train_iter_num+1, [('val_loss', val_loss),
                                                       ("val_image_acc", val_image_cls_acc)])
                # 损失和指标清零
                self.train_loss.reset_states()
                self.train_image_cls_acc.reset_states()
                self.train_domain_cls_loss.reset_states()
                self.train_image_cls_acc.reset_states()
                self.train_domain_cls_acc.reset_states()
                self.val_loss.reset_states()
                self.val_image_cls_acc.reset_states()
                self.val_domain_cls_loss.reset_states()
                self.val_image_cls_acc.reset_states()
                self.val_domain_cls_acc.reset_states()

                # 保存训练过程中的模型
                str = "Epoch{:03d}-train_loss-{:.3f}-val_loss-{:.3f}-train_imgae_cls_acc-{:.3f}-val_image_cls_acc-{:.3f}"\
                    .format(ep, train_loss, val_loss,train_image_cls_acc,val_image_cls_acc)
                print(str)
                f.write(str+"\n")           # 写入日志文件
                self.dann_model.save(os.path.join(checkpoint_dir, str + ".h5"))

                '''
                # 判断是否需要早停模型训练过程,判断指标为目标域的图像分类精度
                stop_training = self.early_stopping.on_epoch_end(ep, val_image_cls_acc)
                if stop_training:
                    break
                '''
        self.dann_model.save(os.path.join(checkpoint_dir, "trained_dann_mnist2mnist_m.h5"))
        print('\n----------- end to train -----------\n')

    def train_one_epoch(self,train_source_datagen,train_target_datagen,train_iter_num,ep):
        """
        这是一个周期模型训练的函数
        :param train_source_datagen: 源域训练数据集生成器
        :param train_target_datagen: 目标域训练数据集生成器
        :param train_iter_num: 一个训练周期的迭代次数
        :param ep: 当前训练周期
        :return:
        """
        for i in np.arange(1, train_iter_num + 1):
            # 获取小批量数据集及其图像标签与域标签
            batch_mnist_image_data, batch_mnist_labels = train_source_datagen.__next__()  # train_source_datagen.next_batch()
            batch_mnist_m_image_data, batch_mnist_m_labels = train_target_datagen.__next__()  # train_target_datagen.next_batch()
            batch_domain_labels = np.vstack([np.tile([1., 0.], [len(batch_mnist_labels), 1]),
                                             np.tile([0., 1.], [len(batch_mnist_m_labels), 1])]).astype(np.float32)
            batch_image_data = np.concatenate([batch_mnist_image_data, batch_mnist_m_image_data], axis=0)
            # 更新学习率并可视化
            iter = (ep - 1) * train_iter_num + i
            process = iter * 1.0 / (self.cfg.epoch * train_iter_num)
            self.grl_lambd = grl_lambda_schedule(process)
            learning_rate = learning_rate_schedule(process, init_learning_rate=self.cfg.init_learning_rate)
            tf.keras.backend.set_value(self.optimizer.lr, learning_rate)
            with self.writer_hyperparameter.as_default():
                tf.summary.scalar("hyperparameter/learning_rate", tf.convert_to_tensor(learning_rate), iter)
                tf.summary.scalar("hyperparameter/grl_lambda", tf.convert_to_tensor(self.grl_lambd), iter)

            # 计算图像分类损失梯度
            with tf.GradientTape() as tape:
                # 计算图像分类预测输出、损失和精度
                image_cls_feature = self.feature_encoder(batch_mnist_image_data)
                image_cls_pred = self.image_cls_encoder(image_cls_feature,training=True)
                image_cls_loss = self.loss(batch_mnist_labels,image_cls_pred)
                image_cls_acc = self.acc(batch_mnist_labels, image_cls_pred)

                # 计算域分类预测输出、损失和精度
                domain_cls_feature = self.feature_encoder(batch_image_data)
                domain_cls_pred = self.domain_cls_encoder(self.grl(domain_cls_feature, self.grl_lambd),
                                                          training=True)
                domain_cls_loss = self.loss(batch_domain_labels, domain_cls_pred)
                domain_cls_acc = self.acc(batch_domain_labels, domain_cls_pred)

                # 计算训练损失、图像分类精度和域分类精度
                loss = tf.reduce_mean(image_cls_loss) + tf.reduce_mean(domain_cls_loss)
            # 自定义优化过程
            vars = tape.watched_variables()
            grads = tape.gradient(loss, vars)
            self.optimizer.apply_gradients(zip(grads, vars))

            # 计算平均损失与精度
            self.train_loss(loss)
            self.train_image_cls_loss(image_cls_loss)
            self.train_domain_cls_loss(domain_cls_loss)
            self.train_image_cls_acc(image_cls_acc)
            self.train_domain_cls_acc(domain_cls_acc)

            # 更新进度条
            self.progbar.update(i, [('loss', loss),
                               ('image_cls_loss', image_cls_loss),
                               ('domain_cls_loss', domain_cls_loss),
                               ("image_acc", image_cls_acc),
                               ("domain_acc", domain_cls_acc)])
        # 可视化损失与指标
        with self.writer_train.as_default():
            tf.summary.scalar("loss/loss", self.train_loss.result(), ep)
            tf.summary.scalar("loss/image_cls_loss", self.train_image_cls_loss.result(), ep)
            tf.summary.scalar("loss/domain_cls_loss", self.train_domain_cls_loss.result(), ep)
            tf.summary.scalar("acc/image_cls_acc", self.train_image_cls_acc.result(), ep)
            tf.summary.scalar("acc/domain_cls_acc", self.train_domain_cls_acc.result(), ep)

        return self.train_loss.result(),self.train_image_cls_acc.result()

    def eval_one_epoch(self,val_target_datagen,val_iter_num,ep):
        """
        这是一个周期的模型验证函数
        :param val_target_datagen: 目标域验证数据集生成器
        :param val_iter_num: 一个验证周期的迭代次数
        :param ep: 当前验证周期
        :return:
        """
        for i in np.arange(1, val_iter_num + 1):
            # 获取小批量数据集及其图像标签与域标签
            batch_mnist_m_image_data, batch_mnist_m_labels = val_target_datagen.__next__()
            batch_mnist_m_domain_labels = np.tile([0., 1.], [len(batch_mnist_m_labels), 1]).astype(np.float32)

            # 计算目标域数据的图像分类预测输出和域分类预测输出
            target_image_feature = self.feature_encoder(batch_mnist_m_image_data)
            target_image_cls_pred = self.image_cls_encoder(target_image_feature, training=False)
            target_domain_cls_pred = self.domain_cls_encoder(target_image_feature, training=False)

            # 计算目标域预测相关损失
            target_image_cls_loss = self.loss(batch_mnist_m_labels,target_image_cls_pred)
            target_domain_cls_loss = self.loss(batch_mnist_m_domain_labels,target_domain_cls_pred)
            target_loss = tf.reduce_mean(target_image_cls_loss) + tf.reduce_mean(target_domain_cls_loss)
            # 计算目标域图像分类精度
            image_cls_acc = self.acc(batch_mnist_m_labels, target_image_cls_pred)
            domain_cls_acc = self.acc(batch_mnist_m_domain_labels, target_domain_cls_pred)

            # 更新训练损失与训练精度
            self.val_loss(target_loss)
            self.val_image_cls_loss(target_image_cls_loss)
            self.val_domain_cls_loss(domain_cls_acc)
            self.val_image_cls_acc(image_cls_acc)
            self.val_domain_cls_acc(domain_cls_acc)

        # 可视化验证损失及其指标
        with self.writer_val.as_default():
            tf.summary.scalar("loss/loss", self.val_loss.result(), ep)
            tf.summary.scalar("loss/image_cls_loss", self.val_image_cls_loss.result(), ep)
            tf.summary.scalar("loss/domain_cls_loss", self.val_domain_cls_loss.result(), ep)
            tf.summary.scalar("acc/image_cls_acc", self.val_image_cls_acc.result(), ep)
            tf.summary.scalar("acc/domain_cls_acc", self.val_domain_cls_acc.result(), ep)
        return self.val_loss.result(), self.val_image_cls_acc.result()
예제 #27
0
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restored from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")

    for epoch in range(config['epochs']):
        for X, y in tqdm(train_ds):
            train_step(X, y)

        for X, y in tqdm(val_ds):
            val_step(X, y)

        template = '학습 에포크: {}, 손실: {}, 정확도: {}, 테스트 손실: {}, 테스트 정확도: {}'
        print (template.format(epoch+1,
                            train_loss.result(),
                            train_accuracy.result()*100,
                            val_loss.result(),
                            val_accuracy.result()*100))
        

        # save checkpoint
        save_path = manager.save()
        print(f'Saved checkpoint for epoch {int(ckpt.step)}: {save_path}\n')
        ckpt.step.assign_add(1)


    print('\n학습이 완료됐습니다!!!!\n')

    print('<Model 정보 요약>')
    model.summary()
예제 #28
0
def low_level_train(optimizer, yolo_loss, train_datasets, valid_datasets, train_steps, valid_steps):
    """
    以底层的方式训练,这种方式更好地观察训练过程,监视变量的变化
    :param optimizer: 优化器
    :param yolo_loss: 自定义的loss function
    :param train_datasets: 以tf.data封装好的训练集数据
    :param valid_datasets: 验证集数据
    :param train_steps: 迭代一个epoch的轮次
    :param valid_steps: 同上
    :return: None
    """
    # 创建模型结构
    model = yolo_body()

    # 定义模型评估指标
    train_loss = Mean(name='train_loss')
    valid_loss = Mean(name='valid_loss')

    # 设置保存最好模型的指标
    best_test_loss = float('inf')
    patience = 10
    min_delta = 1e-3
    patience_cnt = 0
    history_loss = []

    # 创建summary
    summary_writer = tf.summary.create_file_writer(logdir=cfg.log_dir)

    # low level的方式计算loss
    for epoch in range(1, cfg.epochs + 1):
        train_loss.reset_states()
        valid_loss.reset_states()
        step = 0
        print("Epoch {}/{}".format(epoch, cfg.epochs))

        # 处理训练集数据
        for batch, (images, labels) in enumerate(train_datasets.take(train_steps)):
            with tf.GradientTape() as tape:
                # 得到预测
                outputs = model(images, training=True)
                # 计算损失(注意这里收集model.losses的前提是Conv2D的kernel_regularizer参数)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                # yolo_loss、label、output都是3个特征层的数据,通过for 拆包之后,一个loss_fn就是yolo_loss中一个特征层
                # 然后逐一计算,
                for output, label, loss_fn in zip(outputs, labels, yolo_loss):
                    pred_loss.append(loss_fn(label, output))

                # 总损失 = yolo损失 + 正则化损失
                total_train_loss = tf.reduce_sum(pred_loss) + regularization_loss

            # 反向传播梯度下降
            # model.trainable_variables代表把loss反向传播到每个可以训练的变量中
            grads = tape.gradient(total_train_loss, model.trainable_variables)
            # 将每个节点的误差梯度gradients,用于更新该节点的可训练变量值
            # zip是把梯度和可训练变量值打包成元组
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            # 更新train_loss
            train_loss.update_state(total_train_loss)
            # 输出训练过程
            rate = (step + 1) / train_steps
            a = "*" * int(rate * 70)
            b = "." * int((1 - rate) * 70)
            loss = train_loss.result().numpy()

            print("\r{}/{} {:^3.0f}%[{}->{}] - loss:{:.4f}".
                  format(batch, train_steps, int(rate * 100), a, b, loss), end='')
            step += 1

        # 计算验证集
        for batch, (images, labels) in enumerate(valid_datasets.take(valid_steps)):
            # 得到预测,不training
            outputs = model(images)
            regularization_loss = tf.reduce_sum(model.losses)
            pred_loss = []
            for output, label, loss_fn in zip(outputs, labels, yolo_loss):
                pred_loss.append(loss_fn(label, output))

            total_valid_loss = tf.reduce_sum(pred_loss) + regularization_loss

            # 更新valid_loss
            valid_loss.update_state(total_valid_loss)

        print('\nLoss: {:.4f}, Test Loss: {:.4f}\n'.format(train_loss.result(), valid_loss.result()))
        # 保存loss,可以选择train的loss
        history_loss.append(valid_loss.result().numpy())

        # 保存到tensorboard里
        with summary_writer.as_default():
            tf.summary.scalar('train_loss', train_loss.result(), step=optimizer.iterations)
            tf.summary.scalar('valid_loss', valid_loss.result(), step=optimizer.iterations)

        # 只保存最好模型
        if valid_loss.result() < best_test_loss:
            best_test_loss = valid_loss.result()
            model.save_weights(cfg.model_path, save_format='tf')

        # EarlyStopping
        if epoch > 1 and history_loss[epoch - 2] - history_loss[epoch - 1] > min_delta:
            patience_cnt = 0
        else:
            patience_cnt += 1

        if patience_cnt >= patience:
            tf.print("No improvement for {} times, early stopping optimization.".format(patience))
            break
예제 #29
0
파일: pix2pose.py 프로젝트: oarriaga/paz
class Pix2Pose(Model):
    def __init__(self, image_shape, discriminator, generator, latent_dim):
        super(Pix2Pose, self).__init__()
        self.image_shape = image_shape
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim

    @property
    def metrics(self):
        return [self.generator_loss, self.discriminator_loss]

    def compile(self, optimizers, losses, loss_weights):
        super(Pix2Pose, self).compile()
        self.optimizer_generator = optimizers['generator']
        self.optimizer_discriminator = optimizers['discriminator']
        self.compute_reconstruction_loss = losses['weighted_reconstruction']
        self.compute_error_prediction_loss = losses['error_prediction']
        self.compute_discriminator_loss = losses['discriminator']

        self.generator_loss = Mean(name='generator_loss')
        self.discriminator_loss = Mean(name='discriminator_loss')
        self.reconstruction_loss = Mean(name='weighted_reconstruction')
        self.error_prediction_loss = Mean(name='error_prediction')
        self.reconstruction_weight = loss_weights['weighted_reconstruction']
        self.error_prediction_weight = loss_weights['error_prediction']

    def _build_discriminator_labels(self, batch_size):
        return tf.concat([tf.ones(batch_size, 1), tf.zeros(batch_size, 1)], 0)

    def _add_noise_to_labels(self, labels):
        noise = tf.random.uniform(tf.shape(labels))
        labels = labels + 0.05 * noise
        return labels

    def _get_batch_size(self, values):
        return tf.shape(values)[0]

    def _train_discriminator(self, RGB_inputs, RGBA_true):
        RGB_true = RGBA_true[:, :, :, 0:3]
        RGB_fake = self.generator(RGB_inputs)[:, :, :, 0:3]
        RGB_fake_true = tf.concat([RGB_fake, RGB_true], axis=0)

        batch_size = self._get_batch_size(RGB_inputs)
        y_true = self._build_discriminator_labels(batch_size)
        y_true = self._add_noise_to_labels(y_true)

        with tf.GradientTape() as tape:
            y_pred = self.discriminator(RGB_fake_true)
            discriminator_loss = self.compute_discriminator_loss(
                y_true, y_pred)
        gradients = tape.gradient(discriminator_loss,
                                  self.discriminator.trainable_weights)
        self.optimizer_discriminator.apply_gradients(
            zip(gradients, self.discriminator.trainable_weights))
        return discriminator_loss

    def _train_generator(self, RGB_inputs):
        batch_size = tf.shape(RGB_inputs)[0]
        y_misleading = tf.zeros((batch_size, 1))
        with tf.GradientTape() as tape:
            RGBE_preds = self.generator(RGB_inputs)
            y_pred = self.discriminator(RGBE_preds[..., 0:3])
            generator_loss = self.compute_discriminator_loss(
                y_misleading, y_pred)
        gradients = tape.gradient(generator_loss,
                                  self.generator.trainable_weights)
        self.optimizer_generator.apply_gradients(
            zip(gradients, self.generator.trainable_weights))
        return generator_loss

    def _train_reconstruction(self, RGB_inputs, RGBA_true):
        with tf.GradientTape() as tape:
            RGBE_pred = self.generator(RGB_inputs)
            reconstruction_loss = self.compute_reconstruction_loss(
                RGBA_true, RGBE_pred)
            reconstruction_loss = (self.reconstruction_weight *
                                   reconstruction_loss)
        gradients = tape.gradient(reconstruction_loss,
                                  self.generator.trainable_weights)
        self.optimizer_generator.apply_gradients(
            zip(gradients, self.generator.trainable_weights))
        return reconstruction_loss

    def _train_error_prediction(self, RGB_inputs, RGBA_true):
        with tf.GradientTape() as tape:
            RGBE_pred = self.generator(RGB_inputs)
            error_prediction_loss = self.compute_error_prediction_loss(
                RGBA_true, RGBE_pred)
            error_prediction_loss = (self.error_prediction_weight *
                                     error_prediction_loss)
        gradients = tape.gradient(error_prediction_loss,
                                  self.generator.trainable_weights)
        self.optimizer_generator.apply_gradients(
            zip(gradients, self.generator.trainable_weights))
        return error_prediction_loss

    def train_step(self, data):
        RGB_inputs, RGBA_true = data[0]['RGB_input'], data[1]['RGB_with_error']

        reconstruction_loss = self._train_reconstruction(RGB_inputs, RGBA_true)
        self.reconstruction_loss.update_state(reconstruction_loss)

        error_loss = self._train_error_prediction(RGB_inputs, RGBA_true)
        self.error_prediction_loss.update_state(error_loss)

        discriminator_loss = self._train_discriminator(RGB_inputs, RGBA_true)
        self.discriminator_loss.update_state(discriminator_loss)

        generator_loss = self._train_generator(RGB_inputs)
        self.generator_loss.update_state(generator_loss)

        return {
            'discriminator_loss': self.discriminator_loss.result(),
            'generator_loss': self.generator_loss.result(),
            'reconstruction_loss': self.reconstruction_loss.result(),
            'error_prediction_loss': self.error_prediction_loss.result()
        }
        optimizer.apply_gradients(zip(grads,
                                      teacher_model.trainable_variables))
        loss_value_test = training.loss([x_val_main_, x_val_aux_], y_val_)

        epoch_loss_avg(loss_value)
        epoch_accuracy(
            y_train_,
            tf.nn.softmax(teacher_model([x_train_main_, x_train_aux_])))
        epoch_loss_avg_val(loss_value_test)
        epoch_accuracy_val(
            y_val_, tf.nn.softmax(teacher_model([x_val_main_, x_val_aux_])))

    # 学習進捗の表示
    print(
        'Epoch {}/{}: Loss: {:.3f}, Accuracy: {:.3%}, Validation Loss: {:.3f}, Validation Accuracy: {:.3%}'
        .format(epoch, EPOCHS_T, epoch_loss_avg.result(),
                epoch_accuracy.result(), epoch_loss_avg_val.result(),
                epoch_accuracy_val.result()))

# Studentモデルの定義
student = KDModel.Students(NUM_CLASSES, T)
student_model = student.createModel(inputs_main)

# Studentモデルの学習
student_model.summary()
# plot_model(student_soft_model, show_shapes=True, to_file='student_model.png')
kd = KDModel.KnowledgeDistillation(teacher_model, student_model, T, ALPHA)
history_student = LossAccHistory()
for epoch in range(1, EPOCHS_S + 1):
    epoch_loss_avg = Mean()
    epoch_loss_avg_val = Mean()