コード例 #1
0
 def build_model(self, weights=None, is_training=None):
     outputs = self.model_outputs(self._input_layer, is_training)
     keras_model = tf.keras.models.Model(inputs=self._input_layer, outputs=outputs, name='yolo_v4')
     if weights:
         logger.info('Loaded pretrained weights from {}'.format(weights))
         keras_model.load_weights(weights, by_name=True)
     return keras_model
コード例 #2
0
def evaluate(test_step, metric, test_dist_dataset, num_batches, print_freq):
    """Runs evaluation steps and aggregate metrics"""
    timer = Timer()
    timer.tic()

    logger.info('Testing...')
    for batch_idx, x in enumerate(test_dist_dataset):
        labels, outputs = test_step(x)
        metric.update_state(labels, outputs)

        if batch_idx % print_freq == 0:
            time = timer.toc(average=False)
            logger.info('Predict for batch: {}/{} Time: {:.3f} sec'.format(
                batch_idx, num_batches, time))
            timer.tic()

    logger.info('Total time: {:.3f} sec'.format(timer.total_time))

    timer.reset()

    logger.info('Evaluating predictions...')
    timer.tic()
    result = metric.result()
    timer.toc(average=False)
    logger.info('Total time: {:.3f} sec'.format(timer.total_time))

    return result
コード例 #3
0
def train(train_step, train_dist_dataset, initial_epoch, initial_step, epochs,
          steps_per_epoch, checkpoint_manager, compression_ctrl, log_dir,
          optimizer, print_freq):

    train_summary_writer = SummaryWriter(log_dir, 'train')
    compression_summary_writer = SummaryWriter(log_dir, 'compression')

    timer = Timer()
    timer.tic()

    logger.info('Training...')
    for epoch in range(initial_epoch, epochs):
        logger.info('Epoch: {}/{}'.format(epoch, epochs))
        compression_ctrl.scheduler.epoch_step(epoch)

        for step, x in enumerate(train_dist_dataset):
            if epoch == initial_epoch and step < initial_step % steps_per_epoch:
                continue

            checkpoint_manager.checkpoint.step.assign_add(1)

            if step == steps_per_epoch:
                save_path = checkpoint_manager.save()
                logger.info('Saved checkpoint for epoch={}: {}'.format(
                    epoch, save_path))
                break

            compression_ctrl.scheduler.step()
            train_loss = train_step(x)
            train_metric_result = tf.nest.map_structure(
                lambda s: s.numpy().astype(float), train_loss)

            if np.isnan(train_metric_result['total_loss']):
                raise ValueError('total loss is NaN')

            train_metric_result.update(
                {'learning_rate': optimizer.lr(optimizer.iterations).numpy()})

            train_summary_writer(metrics=train_metric_result,
                                 step=optimizer.iterations.numpy())

            if step % print_freq == 0:
                time = timer.toc(average=False)
                logger.info('Step: {}/{} Time: {:.3f} sec'.format(
                    step, steps_per_epoch, time))
                logger.info('Training metric = {}'.format(train_metric_result))
                timer.tic()

        statistics = compression_ctrl.statistics()
        print_statistics(statistics)
        statistics = {
            'compression/statistics/' + key: value
            for key, value in statistics.items()
            if isinstance(value, (int, float))
        }
        compression_summary_writer(metrics=statistics,
                                   step=optimizer.iterations.numpy())

    train_summary_writer.close()
    compression_summary_writer.close()
コード例 #4
0
ファイル: main.py プロジェクト: yiweichen04/nncf
def export(config):
    model, model_params = get_model(
        config.model,
        input_shape=config.get('input_info', {}).get('sample_size', None),
        num_classes=config.get('num_classes', 1000),
        pretrained=config.get('pretrained', False),
        weights=config.get('weights', None))
    model = model(**model_params)
    compression_ctrl, compress_model = create_compressed_model(
        model, config.nncf_config)

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(name='acc@1'),
        tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='acc@5')
    ]
    loss_obj = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)

    compress_model.compile(loss=loss_obj, metrics=metrics)
    compress_model.summary()

    if config.ckpt_path is not None:
        load_checkpoint(model=compress_model, ckpt_path=config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info('Saved to {}'.format(save_path))
コード例 #5
0
ファイル: losses.py プロジェクト: yiweichen04/nncf
 def __init__(self, params):
     logger.info('FastrcnnBoxLoss huber_loss_delta {}'.format(params.huber_loss_delta))
     # The delta is typically around the mean value of regression target.
     # for instances, the regression targets of 512x512 input with 6 anchors on
     # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
     self._huber_loss = tf.keras.losses.Huber(
         delta=params.huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM)
コード例 #6
0
def resume_from_checkpoint(checkpoint_manager, compression_ctrl, ckpt_path, steps_per_epoch):
    if load_checkpoint(checkpoint_manager.checkpoint, ckpt_path) == 0:
        return 0
    optimizer = checkpoint_manager.checkpoint.optimizer
    initial_step = optimizer.iterations.numpy()
    initial_epoch = initial_step // steps_per_epoch
    compression_ctrl.scheduler.load_state(initial_step, steps_per_epoch)
    logger.info('Resuming from epoch %d (global step %d)', initial_epoch, initial_step)
    return initial_epoch, initial_step
コード例 #7
0
def build_scheduler(config, epoch_size, batch_size, steps):
    optimizer_config = config.get('optimizer', {})
    schedule_type = optimizer_config.get('schedule_type',
                                         'exponential').lower()
    schedule_params = optimizer_config.get("schedule_params", {})

    if schedule_type == 'exponential':
        decay_rate = schedule_params.get('decay_rate', None)
        if decay_rate is None:
            raise ValueError('decay_rate parameter must be specified '
                             'for the exponential scheduler')

        initial_lr = schedule_params.get('initial_lr', None)
        if initial_lr is None:
            raise ValueError('initial_lr parameter must be specified '
                             'for the exponential scheduler')

        decay_epochs = schedule_params.get('decay_epochs', None)
        decay_steps = decay_epochs * steps if decay_epochs is not None else 0

        logger.info(
            'Using exponential learning rate with: '
            'initial_learning_rate: {initial_lr}, decay_steps: {decay_steps}, '
            'decay_rate: {decay_rate}'.format(initial_lr=initial_lr,
                                              decay_steps=decay_steps,
                                              decay_rate=decay_rate))
        lr = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=initial_lr,
            decay_steps=decay_steps,
            decay_rate=decay_rate)
    elif schedule_type == 'piecewise_constant':
        boundaries = schedule_params.get('boundaries', None)
        if boundaries is None:
            raise ValueError('boundaries parameter must be specified '
                             'for the piecewise_constant scheduler')

        values = schedule_params.get('values', None)
        if values is None:
            raise ValueError('values parameter must be specified '
                             'for the piecewise_constant')

        logger.info(
            'Using Piecewise constant decay with warmup. '
            'Parameters: batch_size: {batch_size}, epoch_size: {epoch_size}, '
            'boundaries: {boundaries}, values: {values}'.format(
                batch_size=batch_size,
                epoch_size=epoch_size,
                boundaries=boundaries,
                values=values))
        steps_per_epoch = epoch_size // batch_size
        boundaries = [steps_per_epoch * x for x in boundaries]
        lr = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
            boundaries, values)
    elif schedule_type == 'step':
        lr = StepLearningRateWithLinearWarmup(steps, schedule_params)

    return lr
コード例 #8
0
ファイル: main.py プロジェクト: yiweichen04/nncf
def resume_from_checkpoint(model, compression_ctrl, ckpt_path,
                           steps_per_epoch):
    if load_checkpoint(model, ckpt_path) == 0:
        return 0
    initial_step = model.optimizer.iterations.numpy()
    initial_epoch = initial_step // steps_per_epoch
    compression_ctrl.scheduler.load_state(initial_step, steps_per_epoch)
    logger.info('Resuming from epoch %d', initial_epoch)
    return initial_epoch
コード例 #9
0
    def _load_tfrecords(self):
        logger.info('Using TFRecords to load {} data.'.format(self._split))

        dataset_key = self._dataset_name.replace('/', '')
        if dataset_key in self._tfrecord_datasets:
            self._dataset_loader = self._tfrecord_datasets[dataset_key](
                config=self._config, is_train=self._is_train)
        else:
            raise ValueError('Unknown dataset name: {}'.format(
                self._dataset_name))

        dataset = self._dataset_loader.as_dataset()

        return dataset
コード例 #10
0
def export(config):
    model_builder = get_model_builder(config)
    model = model_builder.build_model(weights=config.get('weights', None))

    compression_ctrl, compress_model = create_compressed_model(
        model, config.nncf_config)

    if config.ckpt_path:
        checkpoint = tf.train.Checkpoint(model=compress_model)
        load_checkpoint(checkpoint, config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info("Saved to {}".format(save_path))
コード例 #11
0
def export(config):
    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=False) as model:
        compression_ctrl, compress_model = create_compressed_model(
            model, config.nncf_config)

    if config.ckpt_path:
        variables = get_variables(compress_model)
        checkpoint = tf.train.Checkpoint(variables=variables)
        load_checkpoint(checkpoint, config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info("Saved to {}".format(save_path))
コード例 #12
0
    def build_model(self, weights=None, is_training=None):
        with keras_utils.maybe_enter_backend_graph():
            outputs = self.model_outputs(self._input_layer, is_training)
            keras_model = tf.keras.models.Model(inputs=self._input_layer,
                                                outputs=outputs,
                                                name='retinanet')

        if self._checkpoint_path:
            logger.info('Init backbone')
            init_checkpoint_fn = self.make_restore_checkpoint_fn()
            init_checkpoint_fn(keras_model)

        if weights:
            logger.info('Loaded pretrained weights from {}'.format(weights))
            keras_model.load_weights(weights)

        return keras_model
コード例 #13
0
def checkpoint_saver(config):
    """
    Load checkpoint and re-save it without optimizer (memory footprint is reduced)
    """
    model_builder = get_model_builder(config)
    model = model_builder.build_model()

    _, compress_model = create_compressed_model(model, config.nncf_config)

    checkpoint = tf.train.Checkpoint(model=compress_model)
    load_checkpoint(checkpoint, config.ckpt_path)

    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    config.checkpoint_save_dir,
                                                    max_to_keep=None)
    save_path = checkpoint_manager.save()
    logger.info('Saved checkpoint: {}'.format(save_path))
コード例 #14
0
    def build_model(self, weights=None, is_training=None):
        input_layers = self.build_input_layers(self._params, is_training)
        with keras_utils.maybe_enter_backend_graph():
            outputs = self.model_outputs(input_layers, is_training)
            keras_model = tf.keras.models.Model(inputs=input_layers,
                                                outputs=outputs,
                                                name='maskrcnn')

        if self._checkpoint_path:
            logger.info('Init backbone')
            init_checkpoint_fn = self.make_restore_checkpoint_fn()
            init_checkpoint_fn(keras_model)

        if weights:
            logger.info('Loaded pretrained weights from {}'.format(weights))
            _restore_baseline_weights(keras_model, weights)

        return keras_model
コード例 #15
0
def run_train(config):
    strategy = get_distribution_strategy(config)

    # Create dataset
    builders = get_dataset_builders(config, strategy)
    datasets = [builder.build() for builder in builders]
    train_builder, _ = builders
    train_dataset, calibration_dataset = datasets
    train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)

    # Training parameters
    epochs = config.epochs
    steps_per_epoch = train_builder.steps_per_epoch

    # We use `model_batch_size` to create input layer for model
    config.model_batch_size = train_builder.batch_size

    # Create model builder
    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=True) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(model, config.nncf_config)

            scheduler = build_scheduler(
                config=config,
                epoch_size=train_builder.num_examples,
                batch_size=train_builder.global_batch_size,
                steps=steps_per_epoch)

            optimizer = build_optimizer(
                config=config,
                scheduler=scheduler)

            loss_fn = model_builder.build_loss_fn()

            variables = get_variables(compress_model)
            checkpoint = tf.train.Checkpoint(variables=variables, optimizer=optimizer, step=tf.Variable(0))
            checkpoint_manager = tf.train.CheckpointManager(checkpoint, config.checkpoint_save_dir, max_to_keep=None)

            initial_epoch = initial_step = 0
            if config.ckpt_path:
                initial_epoch, initial_step = resume_from_checkpoint(checkpoint_manager,
                                                                     compression_ctrl,
                                                                     config.ckpt_path,
                                                                     steps_per_epoch)
            else:
                logger.info('Initialization...')
                compression_ctrl.initialize(dataset=calibration_dataset)

    train_step = create_train_step_fn(strategy, compress_model, loss_fn, optimizer)

    logger.info('Training...')
    train(train_step, train_dist_dataset, initial_epoch, initial_step,
          epochs, steps_per_epoch, checkpoint_manager, compression_ctrl, config.log_dir, optimizer)

    logger.info('Compression statistics')
    print_statistics(compression_ctrl.statistics())
コード例 #16
0
ファイル: checkpoint_utils.py プロジェクト: yiweichen04/nncf
    def _restore_checkpoint_fn(keras_model):
        """Loads pretrained model through scaffold function."""
        if not checkpoint_path:
            logger.info('checkpoint_path is empty')
            return

        var_prefix = prefix

        if prefix and not prefix.endswith('/'):
            var_prefix += '/'

        var_to_shape_map = _get_checkpoint_map(checkpoint_path)
        assert var_to_shape_map, 'var_to_shape_map should not be empty'

        vars_to_load = _build_assignment_map(keras_model,
                                             prefix=var_prefix,
                                             skip_variables_regex=skip_regex,
                                             var_to_shape_map=var_to_shape_map)

        if not vars_to_load:
            raise ValueError('Variables to load is empty.')

        tf.compat.v1.train.init_from_checkpoint(checkpoint_path, vars_to_load)
コード例 #17
0
    def _load_tfds(self):
        logger.info('Using TFDS to load {} data.'.format(self._split))

        set_hard_limit_num_open_files()

        self._dataset_loader = tfds.builder(self._dataset_name,
                                            data_dir=self._dataset_dir)

        self._dataset_loader.download_and_prepare()

        decoders = {'image': tfds.decode.SkipDecoding()} \
            if self._skip_decoding else None

        read_config = tfds.ReadConfig(interleave_cycle_length=64,
                                      interleave_block_length=1)

        dataset = self._dataset_loader.as_dataset(
            split=self._split,
            as_supervised=self._as_supervised,
            shuffle_files=True,
            decoders=decoders,
            read_config=read_config)

        return dataset
コード例 #18
0
ファイル: main.py プロジェクト: yiweichen04/nncf
def run(config):
    strategy = get_distribution_strategy(config)

    model_fn, model_params = get_model(
        config.model,
        input_shape=config.get('input_info', {}).get('sample_size', None),
        num_classes=config.get('num_classes', 1000),
        pretrained=config.get('pretrained', False),
        weights=config.get('weights', None))

    builders = get_dataset_builders(config, strategy)
    datasets = [builder.build() for builder in builders]

    train_builder, validation_builder = builders
    train_dataset, validation_dataset = datasets

    train_epochs = config.epochs
    train_steps = train_builder.steps_per_epoch
    validation_steps = validation_builder.steps_per_epoch

    with TFOriginalModelManager(model_fn, **model_params) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(
                model, config.nncf_config)
            compression_callbacks = create_compression_callbacks(
                compression_ctrl, log_dir=config.log_dir)

            scheduler = build_scheduler(
                config=config,
                epoch_size=train_builder.num_examples,
                batch_size=train_builder.global_batch_size,
                steps=train_steps)
            optimizer = build_optimizer(config=config, scheduler=scheduler)

            metrics = [
                tf.keras.metrics.CategoricalAccuracy(name='acc@1'),
                tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='acc@5')
            ]
            loss_obj = tf.keras.losses.CategoricalCrossentropy(
                label_smoothing=0.1)

            compress_model.compile(optimizer=optimizer,
                                   loss=loss_obj,
                                   metrics=metrics,
                                   run_eagerly=config.get('eager_mode', False))

            compress_model.summary()

            initial_epoch = 0
            if config.ckpt_path is not None:
                initial_epoch = resume_from_checkpoint(
                    model=compress_model,
                    compression_ctrl=compression_ctrl,
                    ckpt_path=config.ckpt_path,
                    steps_per_epoch=train_steps)
            else:
                logger.info('initialization...')
                compression_ctrl.initialize(dataset=train_dataset)

    callbacks = get_callbacks(model_checkpoint=True,
                              include_tensorboard=True,
                              track_lr=True,
                              write_model_weights=False,
                              initial_step=initial_epoch * train_steps,
                              model_dir=config.log_dir,
                              ckpt_dir=config.checkpoint_save_dir)

    callbacks.extend(compression_callbacks)

    validation_kwargs = {
        'validation_data': validation_dataset,
        'validation_steps': validation_steps,
        'validation_freq': 1,
    }

    if 'train' in config.mode:
        logger.info('training...')
        compress_model.fit(train_dataset,
                           epochs=train_epochs,
                           steps_per_epoch=train_steps,
                           initial_epoch=initial_epoch,
                           callbacks=callbacks,
                           **validation_kwargs)

    logger.info('evaluation...')
    print_statistics(compression_ctrl.statistics())
    compress_model.evaluate(validation_dataset,
                            steps=validation_steps,
                            verbose=1)

    if 'export' in config.mode:
        save_path, save_format = get_saving_parameters(config)
        compression_ctrl.export_model(save_path, save_format)
        logger.info('Saved to {}'.format(save_path))
コード例 #19
0
def run(config):
    strategy = get_distribution_strategy(config)
    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    # Create dataset
    builders = get_dataset_builders(config, strategy.num_replicas_in_sync)
    datasets = [builder.build() for builder in builders]
    train_builder, test_builder = builders
    train_dataset, test_dataset = datasets
    train_dist_dataset = strategy.experimental_distribute_dataset(
        train_dataset)
    test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)

    # Training parameters
    epochs = config.epochs
    steps_per_epoch = train_builder.steps_per_epoch
    num_test_batches = test_builder.steps_per_epoch

    # Create model builder
    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None)) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(
                model, config.nncf_config)

            scheduler = build_scheduler(config=config,
                                        steps_per_epoch=steps_per_epoch)

            optimizer = build_optimizer(config=config, scheduler=scheduler)

            eval_metric = model_builder.eval_metrics()
            loss_fn = model_builder.build_loss_fn(compress_model,
                                                  compression_ctrl.loss)
            predict_post_process_fn = model_builder.post_processing

            checkpoint = tf.train.Checkpoint(model=compress_model,
                                             optimizer=optimizer)
            checkpoint_manager = tf.train.CheckpointManager(
                checkpoint, config.checkpoint_save_dir, max_to_keep=None)

            initial_epoch = initial_step = 0
            if config.ckpt_path:
                initial_epoch, initial_step = resume_from_checkpoint(
                    checkpoint_manager, compression_ctrl, config.ckpt_path,
                    steps_per_epoch, config)
            else:
                logger.info('Initialization...')
                compression_ctrl.initialize(dataset=train_dataset)

    train_step = create_train_step_fn(strategy, compress_model, loss_fn,
                                      optimizer)
    test_step = create_test_step_fn(strategy, compress_model,
                                    predict_post_process_fn)

    if 'train' in config.mode:
        train(train_step, test_step, eval_metric, train_dist_dataset,
              test_dist_dataset, initial_epoch, initial_step, epochs,
              steps_per_epoch, checkpoint_manager, compression_ctrl,
              config.log_dir, optimizer, num_test_batches, config.print_freq)

    print_statistics(compression_ctrl.statistics())
    metric_result = evaluate(test_step, eval_metric, test_dist_dataset,
                             num_test_batches, config.print_freq)
    logger.info('Validation metric = {}'.format(metric_result))

    if config.metrics_dump is not None:
        write_metrics(metric_result['AP'], config.metrics_dump)

    if 'export' in config.mode:
        save_path, save_format = get_saving_parameters(config)
        compression_ctrl.export_model(save_path, save_format)
        logger.info("Saved to {}".format(save_path))
コード例 #20
0
def build_optimizer(config, scheduler):
    optimizer_config = config.get('optimizer', {})

    optimizer_type = optimizer_config.get('type', 'adam').lower()
    optimizer_params = optimizer_config.get("optimizer_params", {})

    logger.info('Building %s optimizer with params %s', optimizer_type,
                optimizer_params)

    if optimizer_type == 'sgd':
        logger.info('Using SGD optimizer')
        nesterov = optimizer_params.get('nesterov', False)
        optimizer = tf.keras.optimizers.SGD(learning_rate=scheduler,
                                            nesterov=nesterov)
    elif optimizer_type == 'momentum':
        logger.info('Using momentum optimizer')
        nesterov = optimizer_params.get('nesterov', False)
        momentum = optimizer_params.get('momentum', 0.9)
        optimizer = tf.keras.optimizers.SGD(learning_rate=scheduler,
                                            momentum=momentum,
                                            nesterov=nesterov)
    elif optimizer_type == 'rmsprop':
        logger.info('Using RMSProp')
        rho = optimizer_params.get('rho', 0.9)
        momentum = optimizer_params.get('momentum', 0.9)
        epsilon = optimizer_params.get('epsilon', 1e-07)
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=scheduler,
                                                rho=rho,
                                                momentum=momentum,
                                                epsilon=epsilon)
    elif optimizer_type == 'adam':
        logger.info('Using Adam')
        beta_1 = optimizer_params.get('beta_1', 0.9)
        beta_2 = optimizer_params.get('beta_2', 0.999)
        epsilon = optimizer_params.get('epsilon', 1e-07)
        optimizer = tf.keras.optimizers.Adam(learning_rate=scheduler,
                                             beta_1=beta_1,
                                             beta_2=beta_2,
                                             epsilon=epsilon)
    elif optimizer_type == 'adamw':
        logger.info('Using AdamW')
        weight_decay = optimizer_params.get('weight_decay', 0.01)
        beta_1 = optimizer_params.get('beta_1', 0.9)
        beta_2 = optimizer_params.get('beta_2', 0.999)
        epsilon = optimizer_params.get('epsilon', 1e-07)
        optimizer = tfa.optimizers.AdamW(weight_decay=weight_decay,
                                         learning_rate=scheduler,
                                         beta_1=beta_1,
                                         beta_2=beta_2,
                                         epsilon=epsilon)
    else:
        raise ValueError('Unknown optimizer %s' % optimizer_type)

    moving_average_decay = optimizer_params.get('moving_average_decay', 0.)
    if moving_average_decay > 0.:
        logger.info('Including moving average decay.')
        optimizer = tfa.optimizers.MovingAverage(
            optimizer, average_decay=moving_average_decay, num_updates=None)
    if optimizer_params.get('lookahead', None):
        logger.info('Using lookahead optimizer.')
        optimizer = tfa.optimizers.Lookahead(optimizer)

    return optimizer
コード例 #21
0
def build_scheduler(config, steps_per_epoch):
    optimizer_config = config.get('optimizer', {})
    schedule_type = optimizer_config.get('schedule_type', 'step').lower()
    schedule_params = optimizer_config.get('schedule_params', {})
    gamma = schedule_params.get('gamma', optimizer_config.get('gamma', 0.1))
    base_lr = schedule_params.get('base_lr',
                                  optimizer_config.get('base_lr', None))

    schedule_base_lr_check(schedule_type, base_lr)

    if schedule_type == 'exponential':
        step = schedule_params.get('step', optimizer_config.get('step', 1))
        decay_steps = step * steps_per_epoch

        logger.info(
            'Using exponential learning rate with: '
            'initial lr: %f, decay steps: %d, '
            'decay rate: %f', base_lr, decay_steps, gamma)
        lr = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=base_lr,
            decay_steps=decay_steps,
            decay_rate=gamma)

    elif schedule_type == 'piecewise_constant':
        boundaries = schedule_params.get(
            'boundaries', optimizer_config.get('boundaries', None))
        if boundaries is None:
            raise ValueError('`boundaries` parameter must be specified '
                             'for the `piecewise_constant` scheduler')

        values = schedule_params.get('values',
                                     optimizer_config.get('values', None))
        if values is None:
            raise ValueError('`values` parameter must be specified '
                             'for the `piecewise_constant` scheduler')

        logger.info(
            'Using Piecewise constant decay with warmup. '
            'Parameters: boundaries: %s, values: %s', boundaries, values)
        boundaries = [steps_per_epoch * x for x in boundaries]
        lr = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
            boundaries, values)

    elif schedule_type == 'multistep':
        logger.info('Using MultiStep learning rate.')
        steps = schedule_params.get('steps',
                                    optimizer_config.get('steps', None))
        if steps is None:
            raise ValueError('`steps` parameter must be specified '
                             'for the `multistep` scheduler')
        steps = [steps_per_epoch * x for x in steps]
        lr = MultiStepLearningRate(base_lr, steps, gamma=gamma)

    elif schedule_type == 'step':
        step = schedule_params.get('step', optimizer_config.get('step', 1))
        decay_steps = step * steps_per_epoch

        logger.info(
            'Using Step learning rate with: '
            'base_lr: %f, decay steps: %d, '
            'gamma: %f', base_lr, decay_steps, gamma)
        lr = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=base_lr,
            decay_steps=decay_steps,
            decay_rate=gamma,
            staircase=True)

    elif schedule_type == 'step_warmup':
        lr = StepLearningRateWithLinearWarmup(schedule_params)

    elif schedule_type == 'cosine':
        decay_steps = steps_per_epoch * config.epochs
        logger.info(
            'Using Cosine learning rate with: '
            'base_lr: %f, decay steps: %d, ', base_lr, decay_steps)
        lr = tf.keras.experimental.CosineDecay(initial_learning_rate=base_lr,
                                               decay_steps=decay_steps)

    else:
        raise KeyError(
            f'Unknown learning rate scheduler type: {schedule_type}')

    return lr
コード例 #22
0
ファイル: main.py プロジェクト: yiweichen04/nncf
def load_checkpoint(model, ckpt_path):
    logger.info('Load from checkpoint is enabled.')
    if tf.io.gfile.isdir(ckpt_path):
        checkpoint = tf.train.latest_checkpoint(ckpt_path)
        logger.info('Latest checkpoint: {}'.format(checkpoint))
    else:
        checkpoint = ckpt_path if tf.io.gfile.exists(ckpt_path +
                                                     '.index') else None
        logger.info('Provided checkpoint: {}'.format(checkpoint))

    if not checkpoint:
        logger.info('No checkpoint detected.')
        return 0

    logger.info(
        'Checkpoint file {} found and restoring from checkpoint'.format(
            checkpoint))
    model.load_weights(checkpoint).expect_partial()
    logger.info('Completed loading from checkpoint.')
    return None
コード例 #23
0
ファイル: checkpoint_utils.py プロジェクト: yiweichen04/nncf
def _build_assignment_map(keras_model,
                          prefix='',
                          skip_variables_regex=None,
                          var_to_shape_map=None):
    """Compute an assignment mapping for loading older checkpoints into a Keras

    model. Variable names are remapped from the original TPUEstimator model to
    the new Keras name.

    Args:
      keras_model: tf.keras.Model object to provide variables to assign.
      prefix: prefix in the variable name to be remove for alignment with names in
        the checkpoint.
      skip_variables_regex: regular expression to math the names of variables that
        do not need to be assign.
      var_to_shape_map: variable name to shape mapping from the checkpoint.

    Returns:
      The variable assignment map.
    """
    assignment_map = {}

    checkpoint_names = None
    if var_to_shape_map:
        predicate = lambda x: not x.endswith('Momentum') and not x.endswith(
            'global_step')
        checkpoint_names = list(filter(predicate, var_to_shape_map.keys()))

    for var in keras_model.variables:
        var_name = var.name

        if skip_variables_regex and re.match(skip_variables_regex, var_name):
            continue

        # Trim the index of the variable.
        if ':' in var_name:
            var_name = var_name[:var_name.rindex(':')]
        if var_name.startswith(prefix):
            var_name = var_name[len(prefix):]

        if not var_to_shape_map:
            assignment_map[var_name] = var
            continue

        # Match name with variables in the checkpoint.
        match_names = []
        for x in checkpoint_names:
            if x.endswith(var_name):
                match_names.append(x)

        try:
            if match_names:
                assert len(match_names
                           ) == 1, 'more then on matches for {}: {}'.format(
                               var_name, match_names)
                checkpoint_names.remove(match_names[0])
                assignment_map[match_names[0]] = var
            else:
                logger.info('Error not found var name: %s', var_name)
        except Exception as ex:
            logger.info('Error removing the match_name: %s', match_names)
            logger.info('Exception: %s', ex)
            raise

    logger.info('Found variable in checkpoint: %d', len(assignment_map))

    return assignment_map
コード例 #24
0
def run_evaluation(config, eval_timeout=None):
    """Runs evaluation on checkpoint save directory"""
    strategy = get_distribution_strategy(config)
    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    dataset_builder = get_dataset_builders(config,
                                           strategy.num_replicas_in_sync)
    dataset = dataset_builder.build()
    num_batches = dataset_builder.steps_per_epoch
    test_dist_dataset = strategy.experimental_distribute_dataset(dataset)

    # We use `model_batch_size` to create input layer for model
    config.model_batch_size = dataset_builder.batch_size

    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=False) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(
                model, config.nncf_config)
            variables = get_variables(compress_model)
            checkpoint = tf.train.Checkpoint(variables=variables,
                                             step=tf.Variable(0))
            eval_metric = model_builder.eval_metrics()
            predict_post_process_fn = model_builder.post_processing

    test_step = create_test_step_fn(strategy, compress_model,
                                    predict_post_process_fn)

    if 'test' in config.mode:
        if config.ckpt_path:
            load_checkpoint(checkpoint, config.ckpt_path)

        statistics = compression_ctrl.statistics()
        print_statistics(statistics)
        metric_result = evaluate(test_step, eval_metric, test_dist_dataset,
                                 num_batches, config.print_freq)
        eval_metric.reset_states()
        logger.info('Test metric = {}'.format(metric_result))

        if 'export' in config.mode:
            save_path, save_format = get_saving_parameters(config)
            compression_ctrl.export_model(save_path, save_format)
            logger.info("Saved to {}".format(save_path))

    elif 'train' in config.mode:
        validation_summary_writer = SummaryWriter(config.log_dir, 'validation')
        checkpoint_dir = config.checkpoint_save_dir
        eval_timeout = config.eval_timeout

        for checkpoint_path in tf.train.checkpoints_iterator(
                checkpoint_dir, timeout=eval_timeout):
            status = checkpoint.restore(checkpoint_path)
            status.expect_partial()
            logger.info(
                'Checkpoint file {} found and restoring from checkpoint'.
                format(checkpoint_path))
            logger.info('Checkpoint step: {}'.format(checkpoint.step.numpy()))
            metric_result = evaluate(test_step, eval_metric, test_dist_dataset,
                                     num_batches, config.print_freq)

            current_step = checkpoint.step.numpy()
            validation_summary_writer(metrics=metric_result, step=current_step)

            eval_metric.reset_states()
            logger.info('Validation metric = {}'.format(metric_result))

        validation_summary_writer.close()

    if config.metrics_dump is not None:
        write_metrics(metric_result['AP'], config.metrics_dump)
コード例 #25
0
def build_optimizer(config, scheduler):
    optimizer_config = config.get('optimizer', {})

    optimizer_type = optimizer_config.get('type', 'adam').lower()
    optimizer_params = optimizer_config.get('optimizer_params', {})

    logger.info('Building %s optimizer with params %s', optimizer_type,
                optimizer_params)

    if optimizer_type in ['sgd', 'momentum']:
        printable_names = {'sgd': 'SGD', 'momentum': 'momentum'}
        logger.info('Using %s optimizer', printable_names[optimizer_type])

        default_momentum_value = 0.9 if optimizer_type == 'momentum' else 0.0
        momentum = optimizer_params.get('momentum', default_momentum_value)
        nesterov = optimizer_params.get('nesterov', False)
        weight_decay = optimizer_config.get('weight_decay', None)
        common_params = {
            'learning_rate': scheduler,
            'nesterov': nesterov,
            'momentum': momentum
        }
        if weight_decay:
            optimizer = tfa.optimizers.SGDW(**common_params,
                                            weight_decay=weight_decay)
        else:
            optimizer = tf.keras.optimizers.SGD(**common_params)
    elif optimizer_type == 'rmsprop':
        logger.info('Using RMSProp optimizer')
        rho = optimizer_params.get('rho', 0.9)
        momentum = optimizer_params.get('momentum', 0.9)
        epsilon = optimizer_params.get('epsilon', 1e-07)
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=scheduler,
                                                rho=rho,
                                                momentum=momentum,
                                                epsilon=epsilon)
    elif optimizer_type in ['adam', 'adamw']:
        printable_names = {'adam': 'Adam', 'adamw': 'AdamW'}
        logger.info('Using %s optimizer', printable_names[optimizer_type])

        beta_1, beta_2 = optimizer_params.get('betas', [0.9, 0.999])
        epsilon = optimizer_params.get('eps', 1e-07)
        amsgrad = optimizer_params.get('amsgrad', False)
        w_decay_defaul_value = 0.01 if optimizer_type == 'adamw' else None
        weight_decay = optimizer_config.get('weight_decay',
                                            w_decay_defaul_value)
        common_params = {
            'learning_rate': scheduler,
            'beta_1': beta_1,
            'beta_2': beta_2,
            'epsilon': epsilon,
            'amsgrad': amsgrad
        }
        if weight_decay:
            optimizer = tfa.optimizers.AdamW(**common_params,
                                             weight_decay=weight_decay)
        else:
            optimizer = tf.keras.optimizers.Adam(**common_params)
    else:
        raise ValueError('Unknown optimizer %s' % optimizer_type)

    moving_average_decay = optimizer_params.get('moving_average_decay', 0.)
    if moving_average_decay > 0.:
        logger.info('Including moving average decay.')
        optimizer = tfa.optimizers.MovingAverage(
            optimizer, average_decay=moving_average_decay, num_updates=None)
    if optimizer_params.get('lookahead', None):
        logger.info('Using lookahead optimizer.')
        optimizer = tfa.optimizers.Lookahead(optimizer)

    return optimizer
コード例 #26
0
def load_checkpoint(checkpoint, ckpt_path):
    logger.info('Load from checkpoint is enabled')
    if tf.io.gfile.isdir(ckpt_path):
        path_to_checkpoint = tf.train.latest_checkpoint(ckpt_path)
        logger.info('Latest checkpoint: {}'.format(path_to_checkpoint))
    else:
        path_to_checkpoint = ckpt_path if tf.io.gfile.exists(
            ckpt_path + '.index') else None
        logger.info('Provided checkpoint: {}'.format(path_to_checkpoint))

    if not path_to_checkpoint:
        logger.info('No checkpoint detected')
        return 0

    logger.info(
        'Checkpoint file {} found and restoring from checkpoint'.format(
            path_to_checkpoint))
    status = checkpoint.restore(path_to_checkpoint)
    status.expect_partial()
    logger.info('Completed loading from checkpoint')

    return None
コード例 #27
0
ファイル: main.py プロジェクト: yiweichen04/nncf
def train(train_step, test_step, eval_metric, train_dist_dataset,
          test_dist_dataset, initial_epoch, initial_step, epochs,
          steps_per_epoch, checkpoint_manager, compression_ctrl, log_dir,
          optimizer):

    train_summary_writer = SummaryWriter(log_dir, 'train')
    validation_summary_writer = SummaryWriter(log_dir, 'validation')
    compression_summary_writer = SummaryWriter(log_dir, 'compression')

    logger.info('Training started')
    for epoch in range(initial_epoch, epochs):
        logger.info('Epoch {}/{}'.format(epoch, epochs))
        compression_ctrl.scheduler.epoch_step(epoch)

        for step, x in enumerate(train_dist_dataset):
            if epoch == initial_epoch and step < initial_step % steps_per_epoch:
                continue
            if step == steps_per_epoch:
                save_path = checkpoint_manager.save()
                logger.info('Saved checkpoint for epoch={}: {}'.format(
                    epoch, save_path))
                break

            compression_ctrl.scheduler.step()
            train_loss = train_step(x)
            train_metric_result = tf.nest.map_structure(
                lambda s: s.numpy().astype(float), train_loss)

            if np.isnan(train_metric_result['total_loss']):
                raise ValueError('total loss is NaN')

            train_metric_result.update(
                {'learning_rate': optimizer.lr(optimizer.iterations).numpy()})

            train_summary_writer(metrics=train_metric_result,
                                 step=optimizer.iterations.numpy())

            if step % 100 == 0:
                logger.info('Step {}/{}'.format(step, steps_per_epoch))
                logger.info('Training metric = {}'.format(train_metric_result))

        logger.info('Evaluation...')
        test_metric_result = evaluate(test_step, eval_metric,
                                      test_dist_dataset)
        validation_summary_writer(metrics=test_metric_result,
                                  step=optimizer.iterations.numpy())
        eval_metric.reset_states()
        logger.info('Validation metric = {}'.format(test_metric_result))

        statistics = compression_ctrl.statistics()
        print_statistics(statistics)
        statistics = {
            'compression/statistics/' + key: value
            for key, value in statistics.items()
            if isinstance(value, (int, float))
        }
        compression_summary_writer(metrics=statistics,
                                   step=optimizer.iterations.numpy())

    train_summary_writer.close()
    validation_summary_writer.close()
    compression_summary_writer.close()