with tf.GradientTape() as tape: loss = train_one_step(model, emb, seq, pos, neg) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) cos_loss(model(seq), emb(pos)) tbcb.on_train_batch_end(step) loss_history.append(loss.numpy()) cos_loss_history.append(cos_loss.result()) # print('========== loss: {:03f} ============'.format(loss_history[-1])) print('Epoch: {:03d}, Loss: {:.3f}, CosineSimilarity: {:.3f}'.format( epoch, loss_history[-1], cos_loss.result())) logs = {'train_loss': loss, 'cosine_similarity': cos_loss.result()} tbcb.on_epoch_end(epoch, logs=logs) if epoch % 5 == 0: t1 = time.time() - t0 T += t1 print('========== Evaluating ==========') t_test = evaluate(model, emb, dataset, max_len) t_valid = evaluate_valid(model, emb, dataset, max_len) print( 'Epoch: {:03d}, Time: {:f}, valid (NDCG@10: {:.4f}, HR@10: {:.4f}), test (NDCG@10: {:.4f}, HR@10: {:.4f})' .format(epoch, T, t_valid[0], t_valid[1], t_test[0], t_test[1])) f.write(str(t_valid) + ' ' + str(t_test) + '\n') f.flush() t0 = time.time()
class SelectorNetwork: def __init__(self, mask_batch_size, tensorboard_logs_dir=""): self.batch_size = mask_batch_size self.mask_batch_size = mask_batch_size self.tr_loss_history = [] self.te_loss_history = [] self.y_pred_std_history = [] self.y_true_std_history = [] self.tf_logs = tensorboard_logs_dir self.epoch_counter = 0 self.data_masks = None self.data_targets = None self.best_performing_mask = None self.sample_weights = None def set_label_input_params(self, y_shape, y_input_layer): self.label_input_layer = y_input_layer self.label_shape = y_shape def create_dense_model(self, input_shape, dense_arch): input_mask_layer = Input(shape=input_shape) x = Flatten()(input_mask_layer) for i in range(len(dense_arch[:-1])): x = Dense(dense_arch[i], activation="sigmoid")(x) x = Dense(dense_arch[-1], activation="linear")(x) self.model = Model(inputs=[input_mask_layer], outputs=x) print("Subject Network model built:") #self.model.summary() def named_logs(self, model, logs): result = {} try: iterator = iter(logs) except TypeError: logs = [logs] for l in zip(model.metrics_names, logs): result[l[0]] = l[1] return result def compile_model(self): self.model.compile(loss='mae', optimizer='adam', metrics=[ self.get_y_std_metric(True), self.get_y_std_metric(False) ]) if self.tf_logs != "": log_path = './logs' self.tb_clbk = TensorBoard(self.tf_logs) self.tb_clbk.set_model(self.model) def train_one(self, epoch_number, apply_weights): # train on data in object memory if apply_weights == False: curr_loss = self.model.train_on_batch(x=self.data_masks, y=self.data_targets) else: curr_loss = self.model.train_on_batch( x=self.data_masks, y=self.data_targets, sample_weight=self.sample_weights) self.best_performing_mask = self.data_masks[np.argmin( self.data_targets, axis=0)] self.tr_loss_history.append(curr_loss) self.epoch_counter = epoch_number if self.tf_logs != "": self.tb_clbk.on_epoch_end(self.epoch_counter, self.named_logs(self.model, curr_loss)) self.data_masks = None self.data_targets = None def append_data(self, x, y): if self.data_masks is None: self.data_masks = x self.data_targets = y else: self.data_masks = np.concatenate([self.data_masks, x], axis=0) self.data_targets = tf.concat([self.data_targets, y], axis=0) def test_one(self, x, y): y_pred = self.model.predict(x=x) curr_loss = self.model.test_on_batch(x=x, y=y) self.te_loss_history.append(curr_loss) # print("SN test loss: "+str(curr_loss)) # print("SN prediction: "+str(np.squeeze(curr_loss))) # print("SN targets: "+str(np.squeeze(y_pred))) return curr_loss def predict(self, x): y_pred = self.model.predict(x=x) return y_pred def get_y_std_metric(self, ifpred=True): def y_pred_std_metric(y_true, y_pred): y_pred_std = K.std(y_pred) self.y_pred_std_history.append(y_pred_std) return y_pred_std def y_true_std_metric(y_true, y_pred): y_true_std = K.std(y_true) self.y_true_std_history.append(y_true_std) return y_true_std if (ifpred == True): return y_pred_std_metric else: return y_true_std_metric
class Trainer: """Class object to setup and carry the training. Takes as input a generator that produces SR images. Conditionally, also a discriminator network and a feature extractor to build the components of the perceptual loss. Compiles the model(s) and trains in a GANS fashion if a discriminator is provided, otherwise carries a regular ISR training. Args: generator: Keras model, the super-scaling, or generator, network. discriminator: Keras model, the discriminator network for the adversarial component of the perceptual loss. feature_extractor: Keras model, feature extractor network for the deep features component of perceptual loss function. lr_train_dir: path to the directory containing the Low-Res images for training. hr_train_dir: path to the directory containing the High-Res images for training. lr_valid_dir: path to the directory containing the Low-Res images for validation. hr_valid_dir: path to the directory containing the High-Res images for validation. learning_rate: float. loss_weights: dictionary, use to weigh the components of the loss function. Contains 'generator' for the generator loss component, and can contain 'discriminator' and 'feature_extractor' for the discriminator and deep features components respectively. logs_dir: path to the directory where the tensorboard logs are saved. weights_dir: path to the directory where the weights are saved. dataname: string, used to identify what dataset is used for the training session. weights_generator: path to the pre-trained generator's weights, for transfer learning. weights_discriminator: path to the pre-trained discriminator's weights, for transfer learning. n_validation:integer, number of validation samples used at training from the validation set. flatness: dictionary. Determines determines the 'flatness' threshold level for the training patches. See the TrainerHelper class for more details. lr_decay_frequency: integer, every how many epochs the learning rate is reduced. lr_decay_factor: 0 < float <1, learning rate reduction multiplicative factor. Methods: train: combines the networks and triggers training with the specified settings. """ def __init__( self, generator, discriminator, feature_extractor, lr_train_dir, hr_train_dir, lr_valid_dir, hr_valid_dir, loss_weights={ 'generator': 1.0, 'discriminator': 0.003, 'feature_extractor': 1 / 12 }, log_dirs={ 'logs': 'logs', 'weights': 'weights' }, fallback_save_every_n_epochs=2, dataname=None, weights_generator=None, weights_discriminator=None, n_validation=None, flatness={ 'min': 0.0, 'increase_frequency': None, 'increase': 0.0, 'max': 0.0 }, learning_rate={ 'initial_value': 0.0004, 'decay_frequency': 100, 'decay_factor': 0.5 }, adam_optimizer={ 'beta1': 0.9, 'beta2': 0.999, 'epsilon': None }, losses={ 'generator': 'mae', 'discriminator': 'binary_crossentropy', 'feature_extractor': 'mse', }, metrics={'generator': 'PSNR_Y'}, ): self.generator = generator self.discriminator = discriminator self.feature_extractor = feature_extractor self.scale = generator.scale self.lr_patch_size = generator.patch_size self.learning_rate = learning_rate self.loss_weights = loss_weights self.weights_generator = weights_generator self.weights_discriminator = weights_discriminator self.adam_optimizer = adam_optimizer self.dataname = dataname self.flatness = flatness self.n_validation = n_validation self.losses = losses self.log_dirs = log_dirs self.metrics = metrics if self.metrics['generator'] == 'PSNR_Y': self.metrics['generator'] = PSNR_Y elif self.metrics['generator'] == 'PSNR': self.metrics['generator'] = PSNR self._parameters_sanity_check() self.model = self._combine_networks() self.settings = {} self.settings['training_parameters'] = locals() self.settings['training_parameters'][ 'lr_patch_size'] = self.lr_patch_size self.settings = self.update_training_config(self.settings) self.logger = get_logger(__name__) self.helper = TrainerHelper( generator=self.generator, weights_dir=log_dirs['weights'], logs_dir=log_dirs['logs'], lr_train_dir=lr_train_dir, feature_extractor=self.feature_extractor, discriminator=self.discriminator, dataname=dataname, weights_generator=self.weights_generator, weights_discriminator=self.weights_discriminator, fallback_save_every_n_epochs=fallback_save_every_n_epochs, ) self.train_dh = DataHandler( lr_dir=lr_train_dir, hr_dir=hr_train_dir, patch_size=self.lr_patch_size, scale=self.scale, n_validation_samples=None, ) self.valid_dh = DataHandler( lr_dir=lr_valid_dir, hr_dir=hr_valid_dir, patch_size=self.lr_patch_size, scale=self.scale, n_validation_samples=n_validation, ) def _parameters_sanity_check(self): """ Parameteres sanity check. """ if self.discriminator: assert self.lr_patch_size * self.scale == self.discriminator.patch_size self.adam_optimizer if self.feature_extractor: assert self.lr_patch_size * self.scale == self.feature_extractor.patch_size check_parameter_keys( self.learning_rate, needed_keys=['initial_value'], optional_keys=['decay_factor', 'decay_frequency'], default_value=None, ) check_parameter_keys( self.flatness, needed_keys=[], optional_keys=['min', 'increase_frequency', 'increase', 'max'], default_value=0.0, ) check_parameter_keys( self.adam_optimizer, needed_keys=['beta1', 'beta2'], optional_keys=['epsilon'], default_value=None, ) check_parameter_keys(self.log_dirs, needed_keys=['logs', 'weights']) def _combine_networks(self): """ Constructs the combined model which contains the generator network, as well as discriminator and geature extractor, if any are defined. """ lr = Input(shape=(self.lr_patch_size, ) * 2 + (3, )) sr = self.generator.model(lr) outputs = [sr] losses = [self.losses['generator']] loss_weights = [self.loss_weights['generator']] if self.discriminator: self.discriminator.model.trainable = False validity = self.discriminator.model(sr) outputs.append(validity) losses.append(self.losses['discriminator']) loss_weights.append(self.loss_weights['discriminator']) if self.feature_extractor: self.feature_extractor.model.trainable = False sr_feats = self.feature_extractor.model(sr) outputs.extend([*sr_feats]) losses.extend([self.losses['feature_extractor']] * len(sr_feats)) loss_weights.extend( [self.loss_weights['feature_extractor'] / len(sr_feats)] * len(sr_feats)) combined = Model(inputs=lr, outputs=outputs) # https://stackoverflow.com/questions/42327543/adam-optimizer-goes-haywire-after-200k-batches-training-loss-grows optimizer = Adam( beta_1=self.adam_optimizer['beta1'], beta_2=self.adam_optimizer['beta2'], lr=self.learning_rate['initial_value'], epsilon=self.adam_optimizer['epsilon'], ) combined.compile(loss=losses, loss_weights=loss_weights, optimizer=optimizer, metrics=self.metrics) return combined def _lr_scheduler(self, epoch): """ Scheduler for the learning rate updates. """ n_decays = epoch // self.learning_rate['decay_frequency'] lr = self.learning_rate['initial_value'] * ( self.learning_rate['decay_factor']**n_decays) # no lr below minimum control 10e-7 return max(1e-7, lr) def _flatness_scheduler(self, epoch): if self.flatness['increase']: n_increases = epoch // self.flatness['increase_frequency'] else: return self.flatness['min'] f = self.flatness['min'] + n_increases * self.flatness['increase'] return min(self.flatness['max'], f) def _load_weights(self): """ Loads the pretrained weights from the given path, if any is provided. If a discriminator is defined, does the same. """ if self.weights_generator: self.model.get_layer('generator').load_weights( self.weights_generator) if self.discriminator: if self.weights_discriminator: self.model.get_layer('discriminator').load_weights( self.weights_discriminator) self.discriminator.model.load_weights( self.weights_discriminator) def _format_losses(self, prefix, losses, model_metrics): """ Creates a dictionary for tensorboard tracking. """ return dict(zip([prefix + m for m in model_metrics], losses)) def update_training_config(self, settings): """ Summarizes training setting. """ _ = settings['training_parameters'].pop('weights_generator') _ = settings['training_parameters'].pop('self') _ = settings['training_parameters'].pop('generator') _ = settings['training_parameters'].pop('discriminator') _ = settings['training_parameters'].pop('feature_extractor') settings['generator'] = {} settings['generator']['name'] = self.generator.name settings['generator']['parameters'] = self.generator.params settings['generator']['weights_generator'] = self.weights_generator _ = settings['training_parameters'].pop('weights_discriminator') if self.discriminator: settings['discriminator'] = {} settings['discriminator']['name'] = self.discriminator.name settings['discriminator'][ 'weights_discriminator'] = self.weights_discriminator else: settings['discriminator'] = None if self.discriminator: settings['feature_extractor'] = {} settings['feature_extractor']['name'] = self.feature_extractor.name settings['feature_extractor'][ 'layers'] = self.feature_extractor.layers_to_extract else: settings['feature_extractor'] = None return settings def train(self, epochs, steps_per_epoch, batch_size, monitored_metrics): """ Carries on the training for the given number of epochs. Sends the losses to Tensorboard. Args: epochs: how many epochs to train for. steps_per_epoch: how many batches epoch. batch_size: amount of images per batch. monitored_metrics: dictionary, the keys are the metrics that are monitored for the weights saving logic. The values are the mode that trigger the weights saving ('min' vs 'max'). """ self.settings['training_parameters'][ 'steps_per_epoch'] = steps_per_epoch self.settings['training_parameters']['batch_size'] = batch_size starting_epoch = self.helper.initialize_training( self) # load_weights, creates folders, creates basename self.tensorboard = TensorBoard( log_dir=str(self.helper.callback_paths['logs'])) self.tensorboard.set_model(self.model) # validation data validation_set = self.valid_dh.get_validation_set(batch_size) y_validation = [validation_set['hr']] if self.discriminator: discr_out_shape = list( self.discriminator.model.outputs[0].shape)[1:4] valid = np.ones([batch_size] + discr_out_shape) fake = np.zeros([batch_size] + discr_out_shape) validation_valid = np.ones([len(validation_set['hr'])] + discr_out_shape) y_validation.append(validation_valid) if self.feature_extractor: validation_feats = self.feature_extractor.model.predict( validation_set['hr']) y_validation.extend([*validation_feats]) for epoch in range(starting_epoch, epochs): self.logger.info('Epoch {e}/{tot_eps}'.format(e=epoch, tot_eps=epochs)) K.set_value(self.model.optimizer.lr, self._lr_scheduler(epoch=epoch)) self.logger.info('Current learning rate: {}'.format( K.eval(self.model.optimizer.lr))) flatness = self._flatness_scheduler(epoch) if flatness: self.logger.info( 'Current flatness treshold: {}'.format(flatness)) epoch_start = time() for step in tqdm(range(steps_per_epoch)): batch = self.train_dh.get_batch(batch_size, flatness=flatness) y_train = [batch['hr']] training_losses = {} ## Discriminator training if self.discriminator: sr = self.generator.model.predict(batch['lr']) d_loss_real = self.discriminator.model.train_on_batch( batch['hr'], valid) d_loss_fake = self.discriminator.model.train_on_batch( sr, fake) d_loss_fake = self._format_losses( 'train_d_fake_', d_loss_fake, self.discriminator.model.metrics_names) d_loss_real = self._format_losses( 'train_d_real_', d_loss_real, self.discriminator.model.metrics_names) training_losses.update(d_loss_real) training_losses.update(d_loss_fake) y_train.append(valid) ## Generator training if self.feature_extractor: hr_feats = self.feature_extractor.model.predict( batch['hr']) y_train.extend([*hr_feats]) model_losses = self.model.train_on_batch(batch['lr'], y_train) model_losses = self._format_losses('train_', model_losses, self.model.metrics_names) training_losses.update(model_losses) self.tensorboard.on_epoch_end(epoch * steps_per_epoch + step, training_losses) self.logger.debug('Losses at step {s}:\n {l}'.format( s=step, l=training_losses)) elapsed_time = time() - epoch_start self.logger.info('Epoch {} took {:10.1f}s'.format( epoch, elapsed_time)) validation_losses = self.model.evaluate(validation_set['lr'], y_validation, batch_size=batch_size) validation_losses = self._format_losses('val_', validation_losses, self.model.metrics_names) if epoch == starting_epoch: remove_metrics = [] for metric in monitored_metrics: if (metric not in training_losses) and ( metric not in validation_losses): msg = ' '.join([ metric, 'is NOT among the model metrics, removing it.' ]) self.logger.error(msg) remove_metrics.append(metric) for metric in remove_metrics: _ = monitored_metrics.pop(metric) # should average train metrics end_losses = {} end_losses.update(validation_losses) end_losses.update(training_losses) self.helper.on_epoch_end( epoch=epoch, losses=end_losses, generator=self.model.get_layer('generator'), discriminator=self.discriminator, metrics=monitored_metrics, ) self.tensorboard.on_epoch_end(epoch, validation_losses) self.tensorboard.on_train_end(None)
def train_srgan( self, epochs, batch_size, dataname, datapath_train, datapath_validation=None, steps_per_validation=1000, datapath_test=None, workers=4, max_queue_size=10, first_epoch=0, print_frequency=1, crops_per_image=2, log_weight_frequency=None, log_weight_path='./data/weights/', log_tensorboard_path='./data/logs/', log_tensorboard_name='SRGAN', log_tensorboard_update_freq=10000, log_test_frequency=500, log_test_path="./images/samples/", ): """Train the SRGAN network :param int epochs: how many epochs to train the network for :param str dataname: name to use for storing model weights etc. :param str datapath_train: path for the image files to use for training :param str datapath_test: path for the image files to use for testing / plotting :param int print_frequency: how often (in epochs) to print progress to terminal. Warning: will run validation inference! :param int log_weight_frequency: how often (in epochs) should network weights be saved. None for never :param int log_weight_path: where should network weights be saved :param int log_test_frequency: how often (in epochs) should testing & validation be performed :param str log_test_path: where should test results be saved :param str log_tensorboard_path: where should tensorflow logs be sent :param str log_tensorboard_name: what folder should tf logs be saved under """ # Create train data loader loader = DataLoader(datapath_train, batch_size, self.height_hr, self.width_hr, self.upscaling_factor, crops_per_image) # Validation data loader if datapath_validation is not None: validation_loader = DataLoader(datapath_validation, batch_size, self.height_hr, self.width_hr, self.upscaling_factor, crops_per_image) # Use several workers on CPU for preparing batches enqueuer = OrderedEnqueuer(loader, use_multiprocessing=True, shuffle=True) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() # Callback: tensorboard if log_tensorboard_path: tensorboard = TensorBoard(log_dir=os.path.join( log_tensorboard_path, log_tensorboard_name), histogram_freq=0, batch_size=batch_size, write_graph=False, write_grads=False, update_freq=log_tensorboard_update_freq) tensorboard.set_model(self.srgan) else: print( ">> Not logging to tensorboard since no log_tensorboard_path is set" ) # Callback: format input value def named_logs(model, logs): """Transform train_on_batch return value to dict expected by on_batch_end callback""" result = {} for l in zip(model.metrics_names, logs): result[l[0]] = l[1] return result # Shape of output from discriminator disciminator_output_shape = list(self.discriminator.output_shape) disciminator_output_shape[0] = batch_size disciminator_output_shape = tuple(disciminator_output_shape) # VALID / FAKE targets for discriminator real = np.ones(disciminator_output_shape) fake = np.zeros(disciminator_output_shape) # Each epoch == "update iteration" as defined in the paper print_losses = {"G": [], "D": []} start_epoch = datetime.datetime.now() # Random images to go through idxs = np.random.randint(0, len(loader), epochs) # Loop through epochs / iterations for epoch in range(first_epoch, int(epochs) + first_epoch): # Start epoch time if epoch % (print_frequency + 1) == 0: start_epoch = datetime.datetime.now() # Train discriminator imgs_lr, imgs_hr = next(output_generator) generated_hr = self.generator.predict(imgs_lr) real_loss = self.discriminator.train_on_batch(imgs_hr, real) fake_loss = self.discriminator.train_on_batch(generated_hr, fake) discriminator_loss = 0.5 * np.add(real_loss, fake_loss) # Train generator features_hr = self.vgg.predict(self.preprocess_vgg(imgs_hr)) generator_loss = self.srgan.train_on_batch(imgs_lr, [real, features_hr]) # Callbacks logs = named_logs(self.srgan, generator_loss) tensorboard.on_epoch_end(epoch, logs) # Save losses print_losses['G'].append(generator_loss) print_losses['D'].append(discriminator_loss) # Show the progress if epoch % print_frequency == 0: g_avg_loss = np.array(print_losses['G']).mean(axis=0) d_avg_loss = np.array(print_losses['D']).mean(axis=0) print( "\nEpoch {}/{} | Time: {}s\n>> Generator/GAN: {}\n>> Discriminator: {}" .format( epoch, epochs + first_epoch, (datetime.datetime.now() - start_epoch).seconds, ", ".join([ "{}={:.4f}".format(k, v) for k, v in zip( self.srgan.metrics_names, g_avg_loss) ]), ", ".join([ "{}={:.4f}".format(k, v) for k, v in zip( self.discriminator.metrics_names, d_avg_loss) ]))) print_losses = {"G": [], "D": []} # Run validation inference if specified if datapath_validation: validation_losses = self.generator.evaluate_generator( validation_loader, steps=steps_per_validation, use_multiprocessing=workers > 1, workers=workers) print(">> Validation Losses: {}".format(", ".join([ "{}={:.4f}".format(k, v) for k, v in zip( self.generator.metrics_names, validation_losses) ]))) # If test images are supplied, run model on them and save to log_test_path if datapath_test and epoch % log_test_frequency == 0: plot_test_images(self, loader, datapath_test, log_test_path, epoch) # Check if we should save the network weights if log_weight_frequency and epoch % log_weight_frequency == 0: # Save the network weights self.save_weights(os.path.join(log_weight_path, dataname))
class MLP: def __init__(self, x_batch_size, mask_batch_size, tensorboard_logs_dir="", add_mopt_perf_metric=True, use_early_stopping=True): self.batch_size = mask_batch_size * x_batch_size self.mask_batch_size = mask_batch_size self.x_batch_size = x_batch_size self.losses_per_sample = None self.tr_loss_history = [] self.te_loss_history = [] self.tf_logs = tensorboard_logs_dir self.epoch_counter = 0 self.add_mopt_perf_metric = add_mopt_perf_metric self.useEarlyStopping = use_early_stopping def create_dense_model(self, input_shape, dense_arch, last_activation="linear"): self.x_shape = input_shape self.y_shape = dense_arch[-1] input_data_layer = Input(shape=input_shape) x = Flatten()(input_data_layer) input_mask_layer = Input(shape=input_shape) mask = Flatten()(input_mask_layer) x = tf.keras.layers.Concatenate(axis=1)([x, mask]) for units in dense_arch[:-1]: x = Dense(units, activation="sigmoid")(x) x = Dense(dense_arch[-1], activation=last_activation)(x) self.model = Model(inputs=[input_data_layer, input_mask_layer], outputs=x) # self.model.summary() def create_batch(self, x, masks, y): """ x = [[1,2],[3,4]] -> [[1,2],[1,2],[1,2],[3,4],[3,4],[3,4]] masks = [[0,0],[1,0],[1,1]] -> [[0,0],[1,0],[1,1],[0,0],[1,0],[1,1]] y = [1,3] -> [1 ,1 ,1 ,3 ,3 ,3 ] """ x_prim = np.repeat(x, len(masks), axis=0) y_prim = np.repeat(y, len(masks), axis=0) masks_prim = np.tile(masks, (len(x), 1)) x_prim *= masks_prim return x_prim, masks_prim, y_prim def named_logs(self, model, logs, mode="train"): result = {} try: iterator = iter(logs) except TypeError: logs = [logs] metricNames = (mode + "_" + i for i in model.metrics_names) for l in zip(metricNames, logs): result[l[0]] = l[1] return result def compile_model(self, loss_per_sample, combine_losses, combine_mask_losses, metrics=None): self.mask_loss_combine_function = combine_mask_losses if self.add_mopt_perf_metric is True: if metrics is None: metrics = [self.get_mopt_perf_metric()] else: metrics.append(self.get_mopt_perf_metric()) def logging_loss_function(y_true, y_pred): losses = loss_per_sample(y_true, y_pred)[:, 0] # print(losses) self.losses_per_sample = losses return combine_losses(losses) self.model.compile(loss=logging_loss_function, optimizer='nadam', metrics=metrics, run_eagerly=True) if self.tf_logs != "": log_path = './logs' self.tb_clbk = TensorBoard(self.tf_logs) self.tb_clbk.set_model(self.model) def get_per_mask_loss(self, used_target_shape=None): if used_target_shape is None: used_target_shape = (self.x_batch_size, self.mask_batch_size) losses = tf.reshape(self.losses_per_sample, used_target_shape) losses = self.mask_loss_combine_function(losses) return losses def get_per_mask_loss_with_custom_batch(self, losses, new_x_batch_size, new_mask_batch_size): losses = np.reshape(losses, newshape=(new_x_batch_size, new_mask_batch_size)) losses = np.apply_along_axis(self.mask_loss_combine_function, 0, losses) return losses def train_one(self, x, masks, y): x_prim, masks_prim, y_prim = self.create_batch(x, masks, y) curr_loss = self.model.train_on_batch(x=[x_prim, masks_prim], y=y_prim) self.tr_loss_history.append(curr_loss) self.epoch_counter += 1 if self.tf_logs != "": self.tb_clbk.on_epoch_end(self.epoch_counter, self.named_logs(self.model, curr_loss)) return x_prim, masks_prim, y_prim def test_one(self, x, masks, y): x_prim, masks_prim, y_prim = self.create_batch(x, masks, y) feature = self.model.predict(x=[x_prim, masks_prim]) return feature def get_mopt_perf_metric(self): def m_opt_loss(y_pred, y_true): if self.losses_per_sample.shape[0] % self.mask_batch_size != 0: return 0.0 else: losses = tf.reshape(self.losses_per_sample, (-1, self.mask_batch_size)) self.last_m_opt_perf = np.mean( losses[:, int(0.5 * self.mask_batch_size)]) return self.last_m_opt_perf return m_opt_loss def set_early_stopping_params(self, starting_epoch, patience_batches=800, minimize=True): self.ES_patience = patience_batches self.ES_minimize = minimize if minimize is True: self.ES_best_perf = 1000000.0 else: self.ES_best_perf = -1000000.0 self.ES_best_epoch = starting_epoch self.ES_stop_training = False self.ES_start_epoch = starting_epoch self.ES_best_weights = None return
def train_vsrganplus(self, epochs=None, batch_size=16, modelname=None, datapath_train=None, datapath_validation=None, steps_per_validation=1000, datapath_test=None, workers=4, max_queue_size=10, first_epoch=0, print_frequency=1, crops_per_image=2, log_weight_frequency=None, log_weight_path='./model/', log_tensorboard_path='./data/logs/', log_tensorboard_update_freq=10, log_test_frequency=500, log_test_path="./images/samples/", media_type='i'): """Train the VSRGANplus network :param int epochs: how many epochs to train the network for :param str modelname: name to use for storing model weights etc. :param str datapath_train: path for the image files to use for training :param str datapath_test: path for the image files to use for testing / plotting :param int print_frequency: how often (in epochs) to print progress to terminal. Warning: will run validation inference! :param int log_weight_frequency: how often (in epochs) should network weights be saved. None for never :param int log_weight_path: where should network weights be saved :param int log_test_frequency: how often (in epochs) should testing & validation be performed :param str log_test_path: where should test results be saved :param str log_tensorboard_path: where should tensorflow logs be sent """ # Create data loaders train_loader = DataLoader(datapath_train, batch_size, self.height_hr, self.width_hr, self.upscaling_factor, crops_per_image, media_type, self.channels, self.colorspace) # Validation data loader validation_loader = None if datapath_validation is not None: validation_loader = DataLoader(datapath_validation, batch_size, self.height_hr, self.width_hr, self.upscaling_factor, crops_per_image, 'i', self.channels, self.colorspace) test_loader = None if datapath_test is not None: test_loader = DataLoader(datapath_test, 1, self.height_hr, self.width_hr, self.upscaling_factor, 1, 'i', self.channels, self.colorspace) # Use several workers on CPU for preparing batches enqueuer = OrderedEnqueuer(train_loader, use_multiprocessing=True, shuffle=True) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() # Callback: tensorboard if log_tensorboard_path: tensorboard = TensorBoard(log_dir=os.path.join( log_tensorboard_path, modelname), histogram_freq=0, write_graph=True, update_freq=log_tensorboard_update_freq) tensorboard.set_model(self.vsrganplus) else: print( ">> Not logging to tensorboard since no log_tensorboard_path is set" ) # Learning rate scheduler def lr_scheduler(epoch, lr): factor = 0.1 decay_step = [50000, 100000, 200000, 300000] if epoch in decay_step and epoch: return lr * factor return lr lr_scheduler_gan = LearningRateScheduler(lr_scheduler, verbose=1) lr_scheduler_gan.set_model(self.vsrganplus) lr_scheduler_gen = LearningRateScheduler(lr_scheduler, verbose=0) lr_scheduler_gen.set_model(self.generator) lr_scheduler_dis = LearningRateScheduler(lr_scheduler, verbose=0) lr_scheduler_dis.set_model(self.discriminator) lr_scheduler_ra = LearningRateScheduler(lr_scheduler, verbose=0) lr_scheduler_ra.set_model(self.ra_discriminator) # Callback: format input value def named_logs(model, logs): """Transform train_on_batch return value to dict expected by on_batch_end callback""" result = {} for l in zip(model.metrics_names, logs): result[l[0]] = l[1] return result # Shape of output from discriminator disciminator_output_shape = list(self.ra_discriminator.output_shape) disciminator_output_shape[0] = batch_size disciminator_output_shape = tuple(disciminator_output_shape) # VALID / FAKE targets for discriminator real = np.ones(disciminator_output_shape) fake = np.zeros(disciminator_output_shape) # Each epoch == "update iteration" as defined in the paper print_losses = {"GAN": [], "D": []} start_epoch = datetime.datetime.now() # Random images to go through #idxs = np.random.randint(0, len(train_loader), epochs) # Loop through epochs / iterations for epoch in range(first_epoch, int(epochs) + first_epoch): lr_scheduler_gan.on_epoch_begin(epoch) lr_scheduler_ra.on_epoch_begin(epoch) lr_scheduler_dis.on_epoch_begin(epoch) lr_scheduler_gen.on_epoch_begin(epoch) print("\nEpoch {}/{}:".format(epoch + 1, epochs + first_epoch)) # Start epoch time if epoch % print_frequency == 0: start_epoch = datetime.datetime.now() # Train discriminator self.discriminator.trainable = True self.ra_discriminator.trainable = True imgs_lr, imgs_hr = next(output_generator) generated_hr = self.generator.predict(imgs_lr) real_loss = self.ra_discriminator.train_on_batch( [imgs_hr, generated_hr], real) #print("Real: ",real_loss) fake_loss = self.ra_discriminator.train_on_batch( [generated_hr, imgs_hr], fake) #print("Fake: ",fake_loss) discriminator_loss = 0.5 * np.add(real_loss, fake_loss) # Train generator self.discriminator.trainable = False self.ra_discriminator.trainable = False #for _ in tqdm(range(10),ncols=60,desc=">> Training generator"): imgs_lr, imgs_hr = next(output_generator) gan_loss = self.vsrganplus.train_on_batch([imgs_lr, imgs_hr], [imgs_hr, real, imgs_hr]) # Callbacks logs = named_logs(self.vsrganplus, gan_loss) tensorboard.on_epoch_end(epoch, logs) # Save losses print_losses['GAN'].append(gan_loss) print_losses['D'].append(discriminator_loss) # Show the progress if epoch % print_frequency == 0: g_avg_loss = np.array(print_losses['GAN']).mean(axis=0) d_avg_loss = np.array(print_losses['D']).mean(axis=0) print(">> Time: {}s\n>> GAN: {}\n>> Discriminator: {}".format( (datetime.datetime.now() - start_epoch).seconds, ", ".join([ "{}={:.4f}".format(k, v) for k, v in zip( self.vsrganplus.metrics_names, g_avg_loss) ]), ", ".join([ "{}={:.4f}".format(k, v) for k, v in zip( self.discriminator.metrics_names, d_avg_loss) ]))) print_losses = {"GAN": [], "D": []} # Run validation inference if specified if datapath_validation: validation_losses = self.generator.evaluate_generator( validation_loader, steps=steps_per_validation, use_multiprocessing=workers > 1, workers=workers) print(">> Validation Losses: {}".format(", ".join([ "{}={:.4f}".format(k, v) for k, v in zip( self.generator.metrics_names, validation_losses) ]))) # If test images are supplied, run model on them and save to log_test_path if datapath_test and epoch % log_test_frequency == 0: plot_test_images(self.generator, test_loader, datapath_test, log_test_path, epoch, modelname, channels=self.channels, colorspace=self.colorspace) # Check if we should save the network weights if log_weight_frequency and epoch % log_weight_frequency == 0: # Save the network weights self.save_weights(os.path.join(log_weight_path, modelname))
def _run(game, network_params, memory_params, ops): """Sets up and runs the gaming simulation. Initializes TensorFlow, the training agent, and the game environment. The agent plays the game from the starting state for a number of episodes set by the user. Args: args: The arguments from the command line parsed by_parse_arguments. """ # Setup TensorBoard Writer. trial_id = json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get('trial', '') output_path = ops.job_dir if not trial_id else ops.job_dir + '/' tensorboard = TensorBoard(log_dir=output_path) hpt = hypertune.HyperTune() graph = tf.Graph() with graph.as_default(): env = gym.make(game) agent = _create_agent(env, network_params, memory_params) rewards = [] tensorboard.set_model(agent.policy) def _train_or_evaluate(print_score, training=False): """Runs a gaming simulation and writes results for tensorboard. Args: print_score (bool): True to print a score to the console. training (bool): True if the agent is training, False to eval. Returns: loss if training, else reward for evaluating. """ reward = _play(agent, env, training) if print_score: print( 'Train - ', 'Episode: {}'.format(episode), 'Total reward: {}'.format(reward), ) return reward for episode in range(1, ops.episodes + 1): print_score = ops.print_rate and episode % ops.print_rate == 0 get_summary = ops.eval_rate and episode % ops.eval_rate == 0 rewards.append(_train_or_evaluate(print_score, training=True)) if get_summary: avg_reward = sum(rewards) / len(rewards) summary = {'eval_reward': avg_reward} tensorboard.on_epoch_end(episode, summary) hpt.report_hyperparameter_tuning_metric( hyperparameter_metric_tag='avg_reward', metric_value=avg_reward, global_step=episode) print( 'Eval - ', 'Episode: {}'.format(episode), 'Average Reward: {}'.format(avg_reward), ) rewards = [] tensorboard.on_train_end(None) _record_video(env, agent, output_path) agent.policy.save(output_path, save_format='tf')
def train(train_lbld_trios, val_lbls_trios, network, weights, model_path, n_epochs, init_lr, optmzr_name, imagenet=False, freeze_until=None): """Training function: train a model of type 'network' over the data. Args: network (str): String identifying the network architecture to use. weights (str): Path string to a .cpkt weights file. model_path (str): Path string to a directory to save models in. n_epochs (int): Integer representing the number of epochs to run training. """ # Create a folder for saving trained models if os.path.isdir(model_path) is False: logging.info("Creating a folder to save models at: " + str(model_path)) os.mkdir(model_path) starting_epoch = 0 if network == 'SiameseNetTriplet': siamese_net = SiameseNetTriplet((128, 128, 3), arch='resnet18', sliding=True, imagenet=imagenet, freeze_until=freeze_until) optimizer = Adam(lr=0.0006) model = siamese_net.build_model() loss_model = siamese_net.loss_model single_model = siamese_net.single_model if weights: print("Loading model at: " + str(weights)) starting_epoch = int(weights.split('-')[1]) + 1 model.load_weights(weights) model.compile(loss=triplet_loss, optimizer=optimizer, metrics=[cos_sim_pos, cos_sim_neg]) # Load data and create data generator: train_ds = tf.data.Dataset.from_generator( image_trio_generator, args=[train_lbld_trios, True, False, False, imagenet], output_types=((tf.float32, tf.float32, tf.float32), tf.float32, tf.string), output_shapes=(((TARGET_WIDTH, TARGET_HEIGHT, 3), (TARGET_WIDTH, TARGET_HEIGHT, 3), (TARGET_WIDTH, TARGET_HEIGHT, 3)), (1, None), (3))) batched_train_ds = train_ds.batch(BATCH_SIZE) # shuffle(10000).batch valid_ds = tf.data.Dataset.from_generator( image_trio_generator, args=[val_lbld_trios, False, False, False, imagenet], output_types=((tf.float32, tf.float32, tf.float32), tf.float32, tf.string), # output_shapes=(((TARGET_WIDTH, TARGET_HEIGHT, 3), (TARGET_WIDTH, TARGET_HEIGHT, 3), (TARGET_WIDTH, TARGET_HEIGHT, 3)), (1, None))) output_shapes=(((TARGET_WIDTH, TARGET_HEIGHT, 3), (TARGET_WIDTH, TARGET_HEIGHT, 3), (TARGET_WIDTH, TARGET_HEIGHT, 3)), (1, None), (3))) batched_valid_ds = valid_ds.batch(BATCH_SIZE) logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = TensorBoard(log_dir=logdir, histogram_freq=0, batch_size=BATCH_SIZE, write_graph=True, write_grads=True) tensorboard_callback.set_model(model) def named_logs(metrics_names, logs): result = {} for l in zip(metrics_names, logs): result[l[0]] = l[1] return result # Train model: steps_per_epoch = len(train_lbld_trios) // BATCH_SIZE val_steps_per_epoch = len( val_lbld_trios) // BATCH_SIZE # steps_per_epoch//3 best_val_loss = 1000 best_train_loss = 1000 batched_train_iter = iter(batched_train_ds) batched_val_iter = iter(batched_valid_ds) # batched_train_iter = batched_train_ds.make_one_shot_iterator() # batched_val_iter = batched_valid_ds.make_one_shot_iterator() for epoch in range(starting_epoch, n_epochs): cumm_csim_pos_tr = 0 cumm_csim_neg_tr = 0 cumm_tr_loss = 0 cumm_csim_pos_val = 0 cumm_csim_neg_val = 0 cumm_val_loss = 0 print('Epoch #' + str(epoch) + ':') for step in tqdm(range(steps_per_epoch)): train_inputs, train_y, train_seq_ids = next(batched_train_iter) #train_inputs, train_y = batched_train_iter.get_next() # tinrain_x1 = train_inputs['input_1'] # train_x2 = train_inputs['input_2'] train_x1 = train_inputs[0] train_x2 = train_inputs[1] train_x3 = train_inputs[2] # train_y = train_y['output'] X_dict = {} seq_ids = [] for idx, row in enumerate(train_seq_ids): for idx2, class_id in enumerate(row): class_id = class_id.numpy().decode('utf8') if class_id not in seq_ids: seq_ids.append(class_id) if class_id in X_dict: X_dict[class_id].append(train_inputs[idx2][idx]) else: X_dict[class_id] = [] X_dict[class_id].append(train_inputs[idx2][idx]) triplets = get_batch_hard(model, train_inputs, seq_ids, BATCH_SIZE) loss, csim_pos_tr, csim_neg_tr = model.train_on_batch( [triplets[0], triplets[1], triplets[2]], train_y[:BATCH_SIZE // 2]) cumm_tr_loss += loss cumm_csim_pos_tr += csim_pos_tr cumm_csim_neg_tr += csim_neg_tr cumm_csim_pos_tr = cumm_csim_pos_tr / steps_per_epoch cumm_csim_neg_tr = cumm_csim_neg_tr / steps_per_epoch cumm_tr_loss = cumm_tr_loss / steps_per_epoch # evaluate for step in tqdm(range(val_steps_per_epoch)): valid_inputs, val_y, val_seq_ids = next(batched_val_iter) # valid_inputs, valid_y = batched_val_iter.get_next() valid_x1 = valid_inputs[0] valid_x2 = valid_inputs[1] valid_x3 = valid_inputs[2] val_loss, csim_pos_val, csim_neg_val = model.test_on_batch( [valid_x1, valid_x2, valid_x3], val_y) cumm_val_loss += val_loss cumm_csim_pos_val += csim_pos_val cumm_csim_neg_val += csim_neg_val cumm_csim_pos_val = cumm_csim_pos_val / val_steps_per_epoch cumm_csim_neg_val = cumm_csim_neg_val / val_steps_per_epoch cumm_val_loss = cumm_val_loss / val_steps_per_epoch print('Training loss: ' + str(cumm_tr_loss)) print('Validation loss: ' + str(cumm_val_loss)) print('* Cosine sim positive (train) for this epoch: %0.2f' % (cumm_csim_pos_tr)) print('* Cosine sim negative (train) for this epoch: %0.2f' % (cumm_csim_neg_tr)) print('* Cosine sim positive (valid) for this epoch: %0.2f' % (cumm_csim_pos_val)) print('* Cosine sim negative (valid) for this epoch: %0.2f' % (cumm_csim_neg_val)) metrics_names = [ 'tr_loss', 'tr_csim_pos', 'tr_csim_neg', 'val_loss', 'val_csim_pos', 'val_csim_neg' ] tensorboard_callback.on_epoch_end( epoch, named_logs(metrics_names, [ cumm_tr_loss, cumm_csim_pos_tr, cumm_csim_neg_tr, cumm_val_loss, cumm_csim_pos_val, cumm_csim_neg_val ])) model_filepath = os.path.join( model_path, "model-{epoch:03d}-{val_loss:.4f}.hdf5".format( epoch=epoch, val_loss=cumm_val_loss)) if cumm_val_loss < best_val_loss * 1.5: if cumm_val_loss < best_val_loss: best_val_loss = cumm_val_loss model.save(model_filepath) # OR model.save_weights() print("Best model w/ val loss {} saved to {}".format( cumm_val_loss, model_filepath)) tensorboard_callback.on_train_end(None) return
class DQNAgent: def __init__(self, state_size, action_space, train=True): self.t = 0 self.max_Q = 0 self.train = train # Get size of state and action self.state_size = state_size self.action_space = action_space self.action_size = action_space # These are hyper parameters for the DQN self.discount_factor = 0.99 self.learning_rate = 1e-4 if self.train: self.epsilon = 0.5 self.initial_epsilon = 0.5 else: self.epsilon = 1e-6 self.initial_epsilon = 1e-6 self.epsilon_min = 0.02 self.batch_size = 64 self.train_start = 100 self.explore = 10000 # Create replay memory using deque self.memory = deque(maxlen=10000) # Create main model and target model if MODEL_TYPE == 1: self.model = self.build_transfer_model_has_batchnorm() self.target_model = self.build_transfer_model_has_batchnorm() elif MODEL_TYPE == 2: self.model = self.build_transfer_model_has_no_batchnorm() self.target_model = self.build_transfer_model_has_no_batchnorm() elif MODEL_TYPE == 3: self.model = self.build_transfer_model_has_batchnorm_with_canny() self.target_model = self.build_transfer_model_has_batchnorm_with_canny( ) elif MODEL_TYPE == 4: self.model = self.build_dc_model_without_transfer() self.target_model = self.build_dc_model_without_transfer() else: self.model = self.build_og_dc_model_from_repo() self.target_model = self.build_og_dc_model_from_repo() # Copy the model to target model # --> initialize the target model so that the parameters of model & target model to be same self.update_target_model() self.tensorboard = TensorBoard(log_dir='./logs/', histogram_freq=0, write_graph=True, write_grads=True) self.tensorboard.set_model(self.model) def build_transfer_model_has_batchnorm(self): source_model = load_model('./files/my_model_new.h5') model = Sequential() for layer in source_model.layers[:-1]: if 'batch' not in layer.name: model.add(layer) for layer in model.layers: layer.trainable = True # model.add(Dense(512, activation="relu", name='dense')) # model.add(Dense(512, activation="relu", name='dense_1')) model.add(Dense(15, activation="linear", name='dense_2')) adam = Adam(lr=self.learning_rate) model.compile(loss='mse', optimizer=adam) return model def build_transfer_model_has_no_batchnorm(self): source_model = load_model('./files/my_model_gs.h5') model = Sequential() for layer in source_model.layers[:-3]: model.add(layer) for layer in model.layers: layer.trainable = True model.add(Dense(512, activation="relu", name='dense')) model.add(Dense(512, activation="relu", name='dense_1')) model.add(Dense(15, activation="linear", name='dense_2')) # print(model.summary()) adam = Adam(lr=self.learning_rate) model.compile(loss='mse', optimizer=adam) return model def build_transfer_model_has_batchnorm_with_canny(self): # source_model = load_model('./files/my_model_canny.h5') # model = Sequential() # for layer in source_model.layers[:-3]: # model.add(layer) # for layer in model.layers: # layer.trainable = True # model.add(Dense(512, activation="relu", name='dense')) # model.add(Dense(512, activation="relu", name='dense_1')) # model.add(Dense(15, activation="linear", name='dense_2')) # # print(model.summary()) # adam = Adam(lr=self.learning_rate) # model.compile(loss='mse', optimizer=adam) # return model # # def build_og_dc_model_from_repo(self): # model = Sequential() # model.add(Conv2D(24, (5, 5), strides=(2, 2), padding="same", # input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS))) # 80*80*4 # model.add(Activation('relu')) # model.add(Conv2D(32, (5, 5), strides=(2, 2), padding="same")) # model.add(Activation('relu')) # model.add(Conv2D(64, (5, 5), strides=(2, 2), padding="same")) # model.add(Activation('relu')) # model.add(Conv2D(64, (3, 3), strides=(2, 2), padding="same")) # model.add(Activation('relu')) # model.add(Conv2D(64, (3, 3), strides=(1, 1), padding="same")) # model.add(Activation('relu')) # model.add(Flatten()) # model.add(Dense(512)) # model.add(Activation('relu')) # # # 15 categorical bins for Steering angles # model.add(Dense(15, activation="linear")) # # adam = Adam(lr=self.learning_rate) # model.compile(loss='mse', optimizer=adam) # # return model return def build_dc_model_without_transfer(self): Input_1 = Input(shape=(64, 64, 3), name='Input_1') Convolution2D_1 = Conv2D(4, kernel_size=3, padding='same', activation='relu')(Input_1) Convolution2D_2 = Conv2D(4, kernel_size=3, padding='same', activation='relu')(Convolution2D_1) # Convolution2D_2 = BatchNormalization()(Convolution2D_2) MaxPooling2D_1 = MaxPooling2D()(Convolution2D_2) Convolution2D_5 = Conv2D(8, kernel_size=3, padding='same', activation='relu')(MaxPooling2D_1) Convolution2D_6 = Conv2D(8, kernel_size=3, padding='same', activation='relu')(Convolution2D_5) # Convolution2D_6 = BatchNormalization()(Convolution2D_6) MaxPooling2D_2 = MaxPooling2D()(Convolution2D_6) Convolution2D_7 = Conv2D(16, kernel_size=3, padding='same', activation='relu')(MaxPooling2D_2) Convolution2D_8 = Conv2D(16, kernel_size=3, padding='same', activation='relu')(Convolution2D_7) Convolution2D_11 = Conv2D(16, kernel_size=3, padding='same', activation='relu')(Convolution2D_8) # Convolution2D_11 = BatchNormalization()(Convolution2D_11) MaxPooling2D_3 = MaxPooling2D()(Convolution2D_11) Convolution2D_9 = Conv2D(32, kernel_size=3, padding='same', activation='relu')(MaxPooling2D_3) Convolution2D_10 = Conv2D(32, kernel_size=3, padding='same', activation='relu')(Convolution2D_9) Convolution2D_12 = Conv2D(16, kernel_size=3, padding='same', activation='relu')(Convolution2D_10) # Convolution2D_12 = BatchNormalization()(Convolution2D_12) MaxPooling2D_4 = MaxPooling2D(name='MaxPooling2D_4')(Convolution2D_12) Convolution2D_13 = Conv2D(32, kernel_size=3, padding='same', activation='relu')(MaxPooling2D_4) Convolution2D_14 = Conv2D(32, kernel_size=3, padding='same', activation='relu')(Convolution2D_13) Convolution2D_16 = Conv2D(16, kernel_size=3, padding='same', activation='relu')(Convolution2D_14) # Convolution2D_16 = BatchNormalization()(Convolution2D_16) MaxPooling2D_5 = MaxPooling2D(name='MaxPooling2D_5')(Convolution2D_16) Flatten_1 = Flatten()(MaxPooling2D_5) Dense_1 = Dense(512, activation='relu')(Flatten_1) # Dropout_1 = Dropout(0.2)(Dense_1) Dense_2 = Dense(512, activation='relu')(Dense_1) # Dropout_2 = Dropout(0.2)(Dense_2) Dense_3 = Dense(15, activation='linear')(Dense_2) model = Model([Input_1], [Dense_3]) model.compile(optimizer='adam', loss='mse') return model def rgb2gray(self, rgb): ''' take a numpy rgb image return a new single channel image converted to greyscale ''' return np.dot(rgb[..., :3], [0.299, 0.587, 0.114]) def process_image(self, obs): # obs = self.rgb2gray(obs) obs = cv2.resize(obs, (IMG_ROWS, IMG_COLS)) return obs def process_image_for_canny(self, obs): # obs = self.rgb2gray(obs) obs = cv2.resize(obs, (IMG_ROWS, IMG_COLS)) obs1 = cv2.Canny(obs, 100, 200) return obs1 def update_target_model(self): self.target_model.set_weights(self.model.get_weights()) # Get action from model using epsilon-greedy policy def get_action(self, s_t): if np.random.rand() <= self.epsilon: return self.action_space.sample()[0] else: print('Max Q') # print("Return Max Q Prediction") q_value = self.model.predict(s_t) # Convert q array to steering value return linear_unbin(q_value[0]) def replay_memory(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def update_epsilon(self): if self.epsilon > self.epsilon_min: self.epsilon -= (self.initial_epsilon - self.epsilon_min) / self.explore def named_logs(self, model, logs): result = {} for l in zip(model.metrics_names, logs): result[l[0]] = l[1] return result def train_replay(self, ep_num): if len(self.memory) < self.train_start: return batch_size = min(self.batch_size, len(self.memory)) minibatch = random.sample(self.memory, batch_size) state_t, action_t, reward_t, state_t1, terminal = zip(*minibatch) state_t = np.concatenate(state_t) state_t1 = np.concatenate(state_t1) targets = self.model.predict(state_t) self.max_Q = np.max(targets[0]) target_val = self.model.predict(state_t1) target_val_ = self.target_model.predict(state_t1) for i in range(batch_size): if terminal[i]: targets[i][action_t[i]] = reward_t[i] else: a = np.argmax(target_val[i]) targets[i][action_t[i]] = reward_t[i] + \ self.discount_factor * (target_val_[i][a]) logs = self.model.train_on_batch(state_t, targets) return logs # self.tensorboard.on_epoch_end(ep_num, self.named_logs(self.model, [logs])) def update_tensorboard(self, e, log, rew): self.tensorboard.on_epoch_end(e, self.named_logs(self.model, [rew])) def load_model(self, name): self.model.load_weights(name) # Save the model which is under training def save_model(self, name): self.model.save_weights(name)
class OperatorNetwork: def __init__(self, x_batch_size, mask_batch_size, tensorboard_logs_dir="", add_mopt_perf_metric=True, use_early_stopping=True): self.batch_size = mask_batch_size * x_batch_size self.mask_batch_size = mask_batch_size self.x_batch_size = x_batch_size self.losses_per_sample = None # self.losses_per_sample = [] self.tr_loss_history = [] self.te_loss_history = [] self.tf_logs = tensorboard_logs_dir self.epoch_counter = 0 self.add_mopt_perf_metric = add_mopt_perf_metric self.useEarlyStopping = use_early_stopping def create_dense_model(self, input_shape, dense_arch, last_activation="linear"): self.x_shape = input_shape self.y_shape = dense_arch[-1] input_data_layer = Input(shape=input_shape) x = Flatten()(input_data_layer) input_mask_layer = Input(shape=input_shape) mask = Flatten()(input_mask_layer) # x = K.concatenate([x,mask]) x = tf.keras.layers.Concatenate(axis=1)([x, mask]) for units in dense_arch[:-1]: x = Dense(units, activation="sigmoid")(x) x = Dense(dense_arch[-1], activation=last_activation)(x) self.model = Model(inputs=[input_data_layer, input_mask_layer], outputs=x) print("Object network model built:") #self.model.summary() def create_1ch_conv_model(self, input_shape, image_shape, filter_sizes, kernel_sizes, dense_arch, padding, last_activation="softmax"): # only for grayscale self.x_shape = input_shape self.y_shape = dense_arch[-1] input_data_layer = Input(shape=input_shape) in1 = Reshape(target_shape=(1,) + image_shape)(input_data_layer) input_mask_layer = Input(shape=input_shape) in2 = Reshape(target_shape=(1,) + image_shape)(input_mask_layer) x = tf.keras.layers.Concatenate(axis=1)([in1, in2]) for i in range(len(filter_sizes)): x = Conv2D(filters=filter_sizes[i], kernel_size=kernel_sizes[i], data_format="channels_first", activation="relu", padding=padding)(x) x = MaxPool2D(pool_size=(2, 2), padding=padding, data_format="channels_first")(x) x = Flatten()(x) for units in dense_arch[:-1]: x = Dense(units, activation="relu")(x) x = Dense(dense_arch[-1], activation=last_activation)(x) self.model = Model(inputs=[input_data_layer, input_mask_layer], outputs=x) print("Object network model built:") #self.model.summary() def create_2ch_conv_model(self, input_shape, image_shape, filter_sizes, kernel_sizes, dense_arch, padding, last_activation="softmax"): # only for grayscale self.x_shape = input_shape self.y_shape = dense_arch[-1] input_data_layer = Input(shape=input_shape) ch_data = Reshape(target_shape=(1,) + image_shape)(input_data_layer) input_mask_layer = Input(shape=input_shape) ch_mask = Reshape(target_shape=(1,) + image_shape)(input_mask_layer) for i in range(len(filter_sizes)): ch_data = Conv2D(filters=filter_sizes[i], kernel_size=kernel_sizes[i], data_format="channels_last", activation="relu", padding=padding)(ch_data) ch_data = MaxPool2D(pool_size=(2, 2), padding=padding, data_format="channels_last")(ch_data) ch_mask = Conv2D(filters=filter_sizes[i], kernel_size=kernel_sizes[i], data_format="channels_last", activation="relu", padding=padding)(ch_mask) ch_mask = MaxPool2D(pool_size=(2, 2), padding=padding, data_format="channels_last")(ch_mask) ch_mask = Flatten()(ch_mask) ch_data = Flatten()(ch_data) x = tf.keras.layers.Concatenate(axis=1)([ch_mask, ch_data]) for units in dense_arch[:-1]: x = Dense(units, activation="relu")(x) x = Dense(dense_arch[-1], activation=last_activation)(x) self.model = Model(inputs=[input_data_layer, input_mask_layer], outputs=x) print("Object network model built:") #self.model.summary() def create_batch(self, x, masks, y): """ x = [[1,2],[3,4]] -> [[1,2],[1,2],[1,2],[3,4],[3,4],[3,4]] masks = [[0,0],[1,0],[1,1]] -> [[0,0],[1,0],[1,1],[0,0],[1,0],[1,1]] y = [1,3] -> [1 ,1 ,1 ,3 ,3 ,3 ] """ # assert len(masks) == self.mask_size x_prim = np.repeat(x, len(masks), axis=0) y_prim = np.repeat(y, len(masks), axis=0) masks_prim = np.tile(masks, (len(x), 1)) x_prim *= masks_prim # MASKING # assert len(x_prim) == self.batch_size return x_prim, masks_prim, y_prim def named_logs(self, model, logs, mode="train"): result = {} try: iterator = iter(logs) except TypeError: logs = [logs] metricNames = (mode + "_" + i for i in model.metrics_names) for l in zip(metricNames, logs): result[l[0]] = l[1] return result def compile_model(self, loss_per_sample, combine_losses, combine_mask_losses, metrics=None): self.mask_loss_combine_function = combine_mask_losses if self.add_mopt_perf_metric is True: if metrics is None: metrics = [self.get_mopt_perf_metric()] else: metrics.append(self.get_mopt_perf_metric()) def logging_loss_function(y_true, y_pred): losses = loss_per_sample(y_true, y_pred) self.losses_per_sample = losses return combine_losses(losses) self.model.compile(loss=logging_loss_function, optimizer='nadam', metrics=metrics, run_eagerly=True) if self.tf_logs != "": log_path = './logs' self.tb_clbk = TensorBoard(self.tf_logs) self.tb_clbk.set_model(self.model) def get_per_mask_loss(self, used_target_shape=None): if used_target_shape is None: used_target_shape = (self.x_batch_size, self.mask_batch_size) losses = tf.reshape(self.losses_per_sample, used_target_shape) # losses = np.apply_along_axis(self.mask_loss_combine_function,0,losses) losses = self.mask_loss_combine_function(losses) return losses def get_per_mask_loss_with_custom_batch(self, losses, new_x_batch_size, new_mask_batch_size): losses = np.reshape(losses, newshape=(new_x_batch_size, new_mask_batch_size)) losses = np.apply_along_axis(self.mask_loss_combine_function, 0, losses) return losses def train_one(self, x, masks, y): x_prim, masks_prim, y_prim = self.create_batch(x, masks, y) curr_loss = self.model.train_on_batch(x=[x_prim, masks_prim], y=y_prim) self.tr_loss_history.append(curr_loss) self.epoch_counter += 1 if self.tf_logs != "": self.tb_clbk.on_epoch_end(self.epoch_counter, self.named_logs(self.model, curr_loss)) return x_prim, masks_prim, y_prim def validate_one(self, x, masks, y): x_prim, masks_prim, y_prim = self.create_batch(x, masks, y) # print("ON: x: "+str(x_prim)) # print("ON: m: "+str(masks_prim)) # print("ON: y_true: "+str(y_prim)) curr_loss = self.model.test_on_batch(x=[x_prim, masks_prim], y=y_prim) self.te_loss_history.append(curr_loss) if self.tf_logs != "": self.tb_clbk.on_epoch_end(self.epoch_counter, self.named_logs(self.model, curr_loss, "val")) if self.useEarlyStopping is True: self.check_ES() # print("ON y_pred:" +str(np.squeeze(y_pred))) # print("ON loss per sample:" +str(np.squeeze(self.losses_per_sample.numpy()))) return x_prim, masks_prim, y_prim, self.losses_per_sample.numpy() def test_one(self, x, masks, y): x_prim, masks_prim, y_prim = self.create_batch(x, masks, y) curr_loss = self.model.test_on_batch(x=[x_prim, masks_prim], y=y_prim) self.te_loss_history.append(curr_loss) return curr_loss def get_mopt_perf_metric(self): # used_target_shape = (self.x_batch_size,self.mask_batch_size) def m_opt_loss(y_pred, y_true): if (self.losses_per_sample.shape[0] % self.mask_batch_size != 0): # when testing happens, not used anymore return 0.0 else: # for training and validation batches losses = tf.reshape(self.losses_per_sample, (-1, self.mask_batch_size)) self.last_m_opt_perf = np.mean(losses[:, int(0.5 * self.mask_batch_size)]) return self.last_m_opt_perf return m_opt_loss def set_early_stopping_params(self, starting_epoch, patience_batches=800, minimize=True): self.ES_patience = patience_batches self.ES_minimize = minimize if (minimize is True): self.ES_best_perf = 1000000.0 else: self.ES_best_perf = -1000000.0 self.ES_best_epoch = starting_epoch self.ES_stop_training = False self.ES_start_epoch = starting_epoch self.ES_best_weights = None return def check_ES(self, ): if self.epoch_counter >= self.ES_start_epoch: if self.ES_minimize is True: if self.last_m_opt_perf < self.ES_best_perf: self.ES_best_perf = self.last_m_opt_perf self.ES_best_epoch = self.epoch_counter self.ES_best_weights = self.model.get_weights() else: if self.last_m_opt_perf > self.ES_best_perf: self.ES_best_perf = self.last_m_opt_perf self.ES_best_epoch = self.epoch_counter self.ES_best_weights = self.model.get_weights() # print("ES patience left: "+str(self.epoch_counter-self.ES_best_epoch)) if (self.epoch_counter - self.ES_best_epoch > self.ES_patience): self.ES_stop_training = True
test_loss, test_accuracy = model.test_on_batch(x_batch_test, y_batch_test) testing_acc.append(test_accuracy) testing_loss.append(test_loss) train_logs_dict = get_logs(train_logs_dict, epoch, model, x_train, y_train) test_logs_dict = get_logs(test_logs_dict, epoch, model, x_test, y_test) logs = { 'acc': np.mean(training_acc), 'loss': np.mean(training_loss), 'val_loss': np.mean(testing_loss), 'val_acc': np.mean(testing_acc) } modelcheckpoint.on_epoch_end(epoch, logs) earlystop.on_epoch_end(epoch, logs) reduce_lr.on_epoch_end(epoch, logs) tensorboard.on_epoch_end(epoch, logs) print( "accuracy: {}, loss: {}, validation accuracy: {}, validation loss: {}". format(np.mean(training_acc), np.mean(training_loss), np.mean(testing_acc), np.mean(testing_loss))) if model.stop_training: break earlystop.on_train_end() modelcheckpoint.on_train_end() reduce_lr.on_train_end() tensorboard.on_train_end() # confusion metric for training y_train_pred = model.predict(x_train).argmax(axis=1) conf_mat = confusion_matrix(y_train, y_train_pred)