def train(self): log.info('Training Model') self.init_train_data() self.init_image_callback() sl = SaveLoss(self.conf.folder) cl = CSVLogger(self.conf.folder + '/training.csv') cl.on_train_begin() es = EarlyStopping('val_loss_mod2_fused', min_delta=0.01, patience=60) es.model = self.model.Segmentor es.on_train_begin() loss_names = self.get_loss_names() total_loss = {n: [] for n in loss_names} progress_bar = Progbar(target=self.batches * self.conf.batch_size) for self.epoch in range(self.conf.epochs): log.info('Epoch %d/%d' % (self.epoch, self.conf.epochs)) epoch_loss = {n: [] for n in loss_names} epoch_loss_list = [] for self.batch in range(self.batches): self.train_batch(epoch_loss) progress_bar.update((self.batch + 1) * self.conf.batch_size) self.set_swa_model_weights() for swa_m in self.get_swa_models(): swa_m.on_epoch_end(self.epoch) self.validate(epoch_loss) for n in loss_names: epoch_loss_list.append((n, np.mean(epoch_loss[n]))) total_loss[n].append(np.mean(epoch_loss[n])) log.info( str('Epoch %d/%d: ' + ', '.join([l + ' Loss = %.5f' for l in loss_names])) % ((self.epoch, self.conf.epochs) + tuple(total_loss[l][-1] for l in loss_names))) logs = {l: total_loss[l][-1] for l in loss_names} cl.model = self.model.D_Mask cl.model.stop_training = False cl.on_epoch_end(self.epoch, logs) sl.on_epoch_end(self.epoch, logs) # print images self.img_callback.on_epoch_end(self.epoch) self.save_models() if self.stop_criterion(es, logs): log.info('Finished training from early stopping criterion') es.on_train_end(logs) cl.on_train_end(logs) for swa_m in self.get_swa_models(): swa_m.on_train_end() # Set final model parameters based on SWA self.model.D_Mask = self.swa_D_Mask.model self.model.D_Image1 = self.swa_D_Image1.model self.model.D_Image2 = self.swa_D_Image2.model self.model.Encoders_Anatomy[0] = self.swa_Enc_Anatomy1.model self.model.Encoders_Anatomy[1] = self.swa_Enc_Anatomy2.model self.model.Enc_Modality = self.swa_Enc_Modality.model self.model.Anatomy_Fuser = self.swa_Anatomy_Fuser.model self.model.Segmentor = self.swa_Segmentor.model self.model.Decoder = self.swa_Decoder.model self.model.Balancer = self.swa_Balancer.model self.save_models() break
def main( dirs, z_size=32, batch_size=100, learning_rate=0.0001, kl_tolerance=0.5, epochs=100, save_model=False, verbose=True, optimizer="Adam" ): if save_model: model_save_path = "tf_vae" if not os.path.exists(model_save_path): os.makedirs(model_save_path) gen = DriveDataGenerator(dirs, image_size=(64,64), batch_size=batch_size, shuffle=True, max_load=10000, images_only=True ) num_batches = len(gen) reset_graph() vae = ConvVAE(z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=True, reuse=False, gpu_mode=True, optimizer=optimizer) early = EarlyStopping(monitor='loss', min_delta=0.1, patience=5, verbose=verbose, mode='auto') early.set_model(vae) early.on_train_begin() best_loss = sys.maxsize if verbose: print("epoch\tstep\tloss\trecon_loss\tkl_loss") for epoch in range(epochs): for idx in range(num_batches): batch = gen[idx] obs = batch.astype(np.float)/255.0 feed = {vae.x: obs,} (train_loss, r_loss, kl_loss, train_step, _) = vae.sess.run([ vae.loss, vae.r_loss, vae.kl_loss, vae.global_step, vae.train_op ], feed) if train_loss < best_loss: best_loss = train_loss if save_model: if ((train_step+1) % 5000 == 0): vae.save_json("tf_vae/vae.json") if verbose: print("{} of {}\t{}\t{:.2f}\t{:.2f}\t{:.2f}".format( epoch, epochs, (train_step+1), train_loss, r_loss, kl_loss) ) gen.on_epoch_end() early.on_epoch_end(epoch, logs={"loss": train_loss}) if vae.stop_training: break early.on_train_end() # finished, final model: if save_model: vae.save_json("tf_vae/vae.json") return best_loss
def train_model(model, data, config, include_tensorboard): model_history = History() model_history.on_train_begin() saver = ModelCheckpoint(full_path(config.model_file()), verbose=1, save_best_only=True, period=1) saver.set_model(model) early_stopping = EarlyStopping(min_delta=config.min_delta, patience=config.patience, verbose=1) early_stopping.set_model(model) early_stopping.on_train_begin() csv_logger = CSVLogger(full_path(config.csv_log_file())) csv_logger.on_train_begin() if include_tensorboard: tensorborad = TensorBoard(histogram_freq=10, write_images=True) tensorborad.set_model(model) else: tensorborad = Callback() epoch = 0 stop = False while(epoch <= config.max_epochs and stop == False): epoch_history = History() epoch_history.on_train_begin() valid_sizes = [] train_sizes = [] print("Epoch:", epoch) for dataset in data.datasets: print("dataset:", dataset.name) model.reset_states() dataset.reset_generators() valid_sizes.append(dataset.valid_generators[0].size()) train_sizes.append(dataset.train_generators[0].size()) fit_history = model.fit_generator(dataset.train_generators[0], dataset.train_generators[0].size(), nb_epoch=1, verbose=0, validation_data=dataset.valid_generators[0], nb_val_samples=dataset.valid_generators[0].size()) epoch_history.on_epoch_end(epoch, last_logs(fit_history)) train_sizes.append(dataset.train_generators[1].size()) fit_history = model.fit_generator(dataset.train_generators[1], dataset.train_generators[1].size(), nb_epoch=1, verbose=0) epoch_history.on_epoch_end(epoch, last_logs(fit_history)) epoch_logs = average_logs(epoch_history, train_sizes, valid_sizes) model_history.on_epoch_end(epoch, logs=epoch_logs) saver.on_epoch_end(epoch, logs=epoch_logs) early_stopping.on_epoch_end(epoch, epoch_logs) csv_logger.on_epoch_end(epoch, epoch_logs) tensorborad.on_epoch_end(epoch, epoch_logs) epoch+= 1 if early_stopping.stopped_epoch > 0: stop = True early_stopping.on_train_end() csv_logger.on_train_end() tensorborad.on_train_end({})