def _make_fit_args(self, *args, **kwargs): assert not 'initial_epoch' in kwargs logger = logging.getLogger() # add callbacks for periodic checkpointing if 'callbacks' not in kwargs: kwargs['callbacks'] = [] hist_logger = HistoryLogger(period=self.period, history_path=self.history_file, recovered_history=self.history) epoch_counter = EpochCounter(counter_path=self.epoch_num_file) model_checkpoint = ModelCheckpoint(self.to_path, save_best_only=self.save_best_only, verbose=True, period=self.period, monitor=self.monitor) if 'validation_data' in kwargs: # get batch size if 'validation_batch_size' in kwargs: batch_size = kwargs['validation_batch_size'] elif 'batch_size' in kwargs: batch_size = kwargs['batch_size'] else: batch_size = None # model evaluate score = self._monitor_score(*kwargs['validation_data'], batch_size=batch_size) if score is not None: model_checkpoint.best = score logger.info(f'checkpoint score: {model_checkpoint.best}') kwargs['callbacks'] += [hist_logger, epoch_counter, model_checkpoint] # Warn user if the training is already complete. if 'epochs' in kwargs and self.initial_epoch >= kwargs['epochs']: epochs = kwargs['epochs'] logger.warning( f'You want to train for {epochs} epochs but {self.initial_epoch} epochs already completed; nothing to do.' ) return args, kwargs
callbacks_list = [checkpoint, tensorboard_callback] else: callbacks_list = [checkpoint] # fit network if os.path.exists(checkpoint_path): print("checkpoint_path exists, loading checkpoint") model = load_model(checkpoint_path) score = model.evaluate(train_x_all, train_y_all, batch_size=batch_size) checkpoint = ModelCheckpoint(checkpoint_path, monitor='mean_squared_error', verbose=1, save_best_only=True, mode='min') checkpoint.best = score[0] if iii < 1000: callbacks_list = [checkpoint, tensorboard_callback] else: callbacks_list = [checkpoint] print("This is iteration {0}".format(iii)) print("Start Time: {0}".format(start / 1e6)) initial_epoch = final_epoch final_epoch = initial_epoch + training_epochs model_fit = model.fit(train_x_all, train_y_all, initial_epoch=initial_epoch, epochs=final_epoch, batch_size=batch_size, verbose=verbose, callbacks=callbacks_list)
def target_train_tpu_main(gen_targets_dir, model_file_path, early_stopping_patience=None, length=None, batch_size=1, period=1, retrain_file=None, retrain_do_compile=False, base_model_file_path='target_common.h5', optimizer=Adam(), optimizer_lr=0.001, epochs=100000): gc.collect() with CustomObjectScope({'RandomLayer': RandomLayer}): if retrain_file is None: gen = VoiceGeneratorTargetTpu(gen_targets_dir, 0.1, batch_size, length, train=True) shape0 = gen[0][0].shape[1] val_gen = VoiceGeneratorTargetTpu(gen_targets_dir, 0.1, train=False, max_size=shape0) model = load_model(base_model_file_path) config = model.get_config() config['layers'][0]['config']['batch_input_shape'] = (None, shape0, 139) config['layers'][3]['config']['rate'] = 0.1 config['layers'][6]['config']['target_shape'] = (shape0 * 2, 64) config['layers'][8]['config']['rate'] = 0.1 config['layers'][11]['config']['target_shape'] = (shape0 * 4, 32) config['layers'][13]['config']['rate'] = 0.1 config['layers'][16]['config']['target_shape'] = (shape0 * 8, 16) config['layers'][18]['config']['rate'] = 0.1 model = Sequential.from_config(config) model.load_weights(base_model_file_path, by_name=True) model.summary() model.compile(loss='mse', optimizer=optimizer) baseline = None else: model = load_model(retrain_file) if retrain_do_compile: model.compile(loss='mse', optimizer=optimizer) config = model.get_config() shape0 = config['layers'][0]['config']['batch_input_shape'][1] gen = VoiceGeneratorTargetTpu(gen_targets_dir, 0.1, batch_size, length, train=True, max_size=shape0) val_gen = VoiceGeneratorTargetTpu(gen_targets_dir, 0.1, train=False, max_size=shape0) baseline = model.test_on_batch(val_gen[0][0], val_gen[0][1]) tpu_grpc_url = 'grpc://' + os.environ['COLAB_TPU_ADDR'] tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( tpu_grpc_url) strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver) model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy) cp = ModelCheckpoint(filepath=model_file_path, monitor='val_loss', save_best_only=True, period=period) if baseline is not None: cp.best = baseline def lr_scheduler(epoch): return optimizer_lr scheduler = LearningRateScheduler(lr_scheduler) if early_stopping_patience is not None: es = EarlyStopping(monitor='val_loss', patience=early_stopping_patience, verbose=0, mode='auto', baseline=baseline) callbacks = [es, cp, scheduler] else: callbacks = [cp, scheduler] model.fit_generator(gen, shuffle=True, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=val_gen) K.clear_session()