def __init__(self, resolution: str = "lr"): super().__init__() if resolution == "lr": self.DATASET_PATH = self.DATASET_PATH_LR self.image_shape = parseConfigsFile( ["preprocess"])["image_shape_low_resolution"] else: self.DATASET_PATH = self.DATASET_PATH_HR self.image_shape = parseConfigsFile( ["preprocess"])["image_shape_high_resolution"] self.image_shape = self.image_shape[:2] self._number_of_classes = 5750 self._logger = super().get_logger()
def __init__( self, remove_overlaps: bool = True, sample_ids: bool = False, ): super().__init__() self._remove_overlaps = remove_overlaps self._sample_ids = sample_ids if self._sample_ids: self._dataset_shape = "iics" else: self._dataset_shape = "iic" if self._remove_overlaps: self._number_of_train_classes = 8069 self._number_of_test_classes = 460 else: self._number_of_train_classes = 8631 self._number_of_test_classes = 500 self._serialized_features = { "class_id": tf.io.FixedLenFeature([], tf.string), "sample_id": tf.io.FixedLenFeature([], tf.string), "image_low_resolution": tf.io.FixedLenFeature([], tf.string), "image_high_resolution": tf.io.FixedLenFeature([], tf.string), } self._dataset_settings = parseConfigsFile(["dataset"])["vggface2_lr"] self._dataset_paths = { "train": self._dataset_settings["train_path"], "test": self._dataset_settings["test_path"], "both": [ self._dataset_settings["train_path"], self._dataset_settings["test_path"], ], } self._logger = super().get_logger() self._class_pairs = super()._get_class_pairs("VGGFace2_LR", "concatenated") super().set_class_pairs(self._class_pairs) self._dataset = None self._get_concatenated_dataset() self._dataset_size = self.get_dataset_size(self._dataset)
def main(): """Main valiation function.""" timing = TimingLogger() timing.start() network_settings, train_settings, preprocess_settings = parseConfigsFile( ["network", "train", "preprocess"]) strategy = tf.distribute.MirroredStrategy() BATCH_SIZE = train_settings["batch_size"] * strategy.num_replicas_in_sync LOGGER.info(" -------- Importing Datasets --------") vgg_dataset = VggFace2(mode="concatenated") synthetic_num_classes = vgg_dataset.get_number_of_classes() validation_dataset = _instantiate_dataset(strategy, BATCH_SIZE) # synthetic_num_classes = 8529 LOGGER.info(" -------- Creating Models and Optimizers --------") srfr_model = _instantiate_models(strategy, network_settings, preprocess_settings, synthetic_num_classes) checkpoint, manager = _create_checkpoint_and_manager(srfr_model) test_summary_writer = _create_summary_writer() LOGGER.info(" -------- Starting Validation --------") with strategy.scope(): validate_model_use_case = ValidateModelUseCase(strategy, test_summary_writer, TimingLogger(), LOGGER) for model_checkpoint in manager.checkpoints: try: checkpoint.restore(model_checkpoint) except: continue LOGGER.info(f" Restored from {model_checkpoint}") validate_model_use_case.execute(srfr_model, validation_dataset, BATCH_SIZE, checkpoint)
import cv2 import tensorflow as tf from utils.input_data import InputData, parseConfigsFile from utils.timing import TimingLogger logging.basicConfig(filename="vgg_to_tfrecords.txt", level=logging.INFO) LOGGER = logging.getLogger(__name__) timing = TimingLogger() timing.start() LOGGER.info("--- Setting Functions ---") SHAPE = tuple( parseConfigsFile(["preprocess"])["image_shape_low_resolution"][:2]) BASE_DATA_DIR = Path("/datasets/VGGFace2_LR/Images") BASE_OUTPUT_PATH = Path("/workspace/datasets/VGGFace2") def _reduce_resolution(high_resolution_image): low_resolution_image = cv2.cvtColor( cv2.resize(high_resolution_image, SHAPE, interpolation=cv2.INTER_CUBIC), cv2.COLOR_BGR2RGB, ) high_resolution_image = cv2.cvtColor(high_resolution_image, cv2.COLOR_BGR2RGB) return ( tf.image.encode_png(low_resolution_image),
def _get_training_settings(): return parseConfigsFile(["train"])
def main(): """Main training function.""" timing = TimingLogger() timing.start() network_settings, train_settings, preprocess_settings = parseConfigsFile( ['network', 'train', 'preprocess']) strategy = tf.distribute.MirroredStrategy() BATCH_SIZE = train_settings['batch_size'] * strategy.num_replicas_in_sync temp_folder = Path.cwd().joinpath('temp', 'synthetic_ds') LOGGER.info(' -------- Importing Datasets --------') vgg_dataset = VggFace2(mode='concatenated') synthetic_dataset = vgg_dataset.get_dataset() synthetic_dataset = vgg_dataset.augment_dataset() synthetic_dataset = vgg_dataset.normalize_dataset() synthetic_dataset = synthetic_dataset.cache(str(temp_folder)) #synthetic_dataset_len = vgg_dataset.get_dataset_size() synthetic_dataset_len = 100_000 synthetic_num_classes = vgg_dataset.get_number_of_classes() synthetic_dataset = synthetic_dataset.shuffle( buffer_size=2_048).repeat().batch(BATCH_SIZE).prefetch(1) lfw_path = Path.cwd().joinpath('temp', 'lfw') lfw_dataset = LFW() (left_pairs, left_aug_pairs, right_pairs, right_aug_pairs, is_same_list) = lfw_dataset.get_dataset() left_pairs = left_pairs.batch(BATCH_SIZE).cache( str(lfw_path.joinpath('left'))).prefetch(AUTOTUNE) left_aug_pairs = left_aug_pairs.batch(BATCH_SIZE).cache( str(lfw_path.joinpath('left_aug'))).prefetch(AUTOTUNE) right_pairs = right_pairs.batch(BATCH_SIZE).cache( str(lfw_path.joinpath('right'))).prefetch(AUTOTUNE) right_aug_pairs = right_aug_pairs.batch(BATCH_SIZE).cache( str(lfw_path.joinpath('right_aug'))).prefetch(AUTOTUNE) # Using `distribute_dataset` to distribute the batches across the GPUs synthetic_dataset = strategy.experimental_distribute_dataset( synthetic_dataset) left_pairs = strategy.experimental_distribute_dataset(left_pairs) left_aug_pairs = strategy.experimental_distribute_dataset(left_aug_pairs) right_pairs = strategy.experimental_distribute_dataset(right_pairs) right_aug_pairs = strategy.experimental_distribute_dataset(right_aug_pairs) LOGGER.info(' -------- Creating Models and Optimizers --------') EPOCHS = generate_num_epochs( train_settings['iterations'], synthetic_dataset_len, BATCH_SIZE, ) with strategy.scope(): srfr_model = SRFR( num_filters=network_settings['num_filters'], depth=50, categories=network_settings['embedding_size'], num_gc=network_settings['gc'], num_blocks=network_settings['num_blocks'], residual_scailing=network_settings['residual_scailing'], training=True, input_shape=preprocess_settings['image_shape_low_resolution'], num_classes_syn=synthetic_num_classes, ) sr_discriminator_model = DiscriminatorNetwork() srfr_optimizer = NovoGrad( learning_rate=train_settings['learning_rate'], beta_1=train_settings['momentum'], beta_2=train_settings['beta_2'], weight_decay=train_settings['weight_decay'], name='novograd_srfr', ) srfr_optimizer = mixed_precision.LossScaleOptimizer( srfr_optimizer, loss_scale='dynamic', ) discriminator_optimizer = NovoGrad( learning_rate=train_settings['learning_rate'], beta_1=train_settings['momentum'], beta_2=train_settings['beta_2'], weight_decay=train_settings['weight_decay'], name='novograd_discriminator', ) discriminator_optimizer = mixed_precision.LossScaleOptimizer( discriminator_optimizer, loss_scale='dynamic') train_loss = partial( strategy.reduce, reduce_op=tf.distribute.ReduceOp.MEAN, axis=0, ) checkpoint = tf.train.Checkpoint( epoch=tf.Variable(1), step=tf.Variable(1), srfr_model=srfr_model, sr_discriminator_model=sr_discriminator_model, srfr_optimizer=srfr_optimizer, discriminator_optimizer=discriminator_optimizer, ) manager = tf.train.CheckpointManager(checkpoint, directory='./training_checkpoints', max_to_keep=5) current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_summary_writer = tf.summary.create_file_writer( str(Path.cwd().joinpath('logs', 'gradient_tape', current_time, 'train')), ) test_summary_writer = tf.summary.create_file_writer( str(Path.cwd().joinpath('logs', 'gradient_tape', current_time, 'test')), ) LOGGER.info(' -------- Starting Training --------') with strategy.scope(): checkpoint.restore(manager.latest_checkpoint) if manager.latest_checkpoint: LOGGER.info(f' Restored from {manager.latest_checkpoint}') else: LOGGER.info(' Initializing from scratch.') for epoch in range(int(checkpoint.epoch), EPOCHS + 1): timing.start(Train.__name__) LOGGER.info(f' Start of epoch {epoch}') train = Train(strategy, srfr_model, srfr_optimizer, sr_discriminator_model, discriminator_optimizer, train_summary_writer, test_summary_writer, checkpoint, manager) srfr_loss, discriminator_loss = train.train_srfr_model( BATCH_SIZE, train_loss, synthetic_dataset, synthetic_num_classes, left_pairs, left_aug_pairs, right_pairs, right_aug_pairs, is_same_list, sr_weight=train_settings['super_resolution_weight'], scale=train_settings['scale'], margin=train_settings['angular_margin'], # natural_ds, # num_classes_natural, ) elapsed_time = timing.end(Train.__name__, True) with train_summary_writer.as_default(): tf.summary.scalar('srfr_loss_per_epoch', srfr_loss, step=epoch) tf.summary.scalar( 'discriminator_loss_per_epoch', discriminator_loss, step=epoch, ) tf.summary.scalar('training_time_per_epoch', elapsed_time, step=epoch) LOGGER.info((f' Epoch {epoch}, SRFR Loss: {srfr_loss:.3f},' f' Discriminator Loss: {discriminator_loss:.3f}')) train.save_model() checkpoint.epoch.assign_add(1)
def train(self): """Main training function.""" self.timing.start() dimensions = self._create_dimensions() hyperparameters = self._create_hyprparameters_domain() with tf.summary.create_file_writer( str(Path.cwd().joinpath("output", "logs", "hparam_tuning"))).as_default(): hp.hparams_config( hparams=hyperparameters, metrics=[hp.Metric("accuracy", display_name="Accuracy")], ) ( network_settings, train_settings, preprocess_settings, ) = parseConfigsFile(["network", "train", "preprocess"]) BATCH_SIZE = train_settings[ "batch_size"] * self.strategy.num_replicas_in_sync ( synthetic_train, synthetic_test, synthetic_dataset_len, synthetic_num_classes, ) = self._get_datasets(BATCH_SIZE) srfr_model, discriminator_model = self._instantiate_models( synthetic_num_classes, network_settings, preprocess_settings) train_model_sr_only_use_case = TrainModelSrOnlyUseCase( self.strategy, TimingLogger(), self.logger, BATCH_SIZE, synthetic_dataset_len, ) _training = partial( self._fitness_function, train_model_use_case=train_model_sr_only_use_case, srfr_model=srfr_model, discriminator_model=discriminator_model, batch_size=BATCH_SIZE, synthetic_train=synthetic_train, synthetic_test=synthetic_test, num_classes=synthetic_num_classes, train_settings=train_settings, hparams=hyperparameters, ) _train = use_named_args(dimensions=dimensions)(_training) initial_parameters = [0.0002, 0.9, 1.0, 0.005, 0.01] search_result = gp_minimize( func=_train, dimensions=dimensions, acq_func="EI", n_calls=20, x0=initial_parameters, ) self.logger.info(f"Best hyperparameters: {search_result.x}")
def main(): """Main training function.""" timing = TimingLogger() timing.start() strategy = tf.distribute.MirroredStrategy() # strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") dimensions = _create_dimensions() hyperparameters = _create_hyprparameters_domain() with tf.summary.create_file_writer( str(Path.cwd().joinpath("output", "logs", "hparam_tuning"))).as_default(): hp.hparams_config( hparams=hyperparameters, metrics=[hp.Metric("accuracy", display_name="Accuracy")], ) ( network_settings, train_settings, preprocess_settings, ) = parseConfigsFile(["network", "train", "preprocess"]) BATCH_SIZE = train_settings["batch_size"] * strategy.num_replicas_in_sync ( synthetic_train, synthetic_test, synthetic_dataset_len, synthetic_num_classes, ) = _get_datasets(BATCH_SIZE, strategy) srfr_model, discriminator_model = _instantiate_models( strategy, synthetic_num_classes, network_settings, preprocess_settings) train_model_use_case = TrainModelJointLearnUseCase( strategy, TimingLogger(), LOGGER, BATCH_SIZE, synthetic_dataset_len, ) _training = partial( _instantiate_training, strategy=strategy, train_model_use_case=train_model_use_case, srfr_model=srfr_model, discriminator_model=discriminator_model, batch_size=BATCH_SIZE, synthetic_train=synthetic_train, synthetic_test=synthetic_test, num_classes=synthetic_num_classes, train_settings=train_settings, hparams=hyperparameters, ) _train = use_named_args(dimensions=dimensions)(_training) search_result = gp_minimize(func=_train, dimensions=dimensions, acq_func="EI", n_calls=20) LOGGER.info(f"Best hyperparameters: {search_result.x}")
from functools import wraps import tensorflow as tf from tensorflow import keras from tensorflow.python.keras.utils.losses_utils import reduce_weighted_loss from tensorflow.keras.losses import ( BinaryCrossentropy, CategoricalCrossentropy, MAE, MeanAbsoluteError, MSE, MeanSquaredError, ) from utils.input_data import parseConfigsFile _BATCH_SIZE_PER_SAMPLE = parseConfigsFile(['train'])['batch_size'] def distributed_sum_over_batch_size(batch_size: int = _BATCH_SIZE_PER_SAMPLE): def _sum_over_batch_size(function): @wraps(function) def wrapper(*args, **kwargs): output_tensor = function(*args, **kwargs) return tf.nn.compute_average_loss(output_tensor, global_batch_size=batch_size) return wrapper return _sum_over_batch_size
import cv2 import tensorflow as tf from utils.timing import TimingLogger from utils.input_data import InputData, parseConfigsFile logging.basicConfig(filename='vgg_to_tfrecords.txt', level=logging.INFO) LOGGER = logging.getLogger(__name__) timing = TimingLogger() timing.start() LOGGER.info('--- Setting Functions ---') shape = tuple( parseConfigsFile(['preprocess'])['image_shape_low_resolution'][:2]) def _reduce_resolution(high_resolution_image): low_resolution_image = cv2.cvtColor( cv2.resize(high_resolution_image, shape, interpolation=cv2.INTER_CUBIC), cv2.COLOR_BGR2RGB) high_resolution_image = cv2.cvtColor(high_resolution_image, cv2.COLOR_BGR2RGB) return tf.image.encode_png(low_resolution_image), tf.image.encode_png( high_resolution_image) def _bytes_feature(value): if isinstance(value, type(tf.constant(0))): value = value.numpy(
def __init__(self): self._logger = logging.getLogger(__name__) self._preprocess_settigs = parseConfigsFile(["preprocess"])