예제 #1
0
def run_train(config):
    strategy = get_distribution_strategy(config)

    # Create dataset
    builders = get_dataset_builders(config, strategy)
    datasets = [builder.build() for builder in builders]
    train_builder, _ = builders
    train_dataset, calibration_dataset = datasets
    train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)

    # Training parameters
    epochs = config.epochs
    steps_per_epoch = train_builder.steps_per_epoch

    # We use `model_batch_size` to create input layer for model
    config.model_batch_size = train_builder.batch_size

    # Create model builder
    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=True) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(model, config.nncf_config)

            scheduler = build_scheduler(
                config=config,
                epoch_size=train_builder.num_examples,
                batch_size=train_builder.global_batch_size,
                steps=steps_per_epoch)

            optimizer = build_optimizer(
                config=config,
                scheduler=scheduler)

            loss_fn = model_builder.build_loss_fn()

            variables = get_variables(compress_model)
            checkpoint = tf.train.Checkpoint(variables=variables, optimizer=optimizer, step=tf.Variable(0))
            checkpoint_manager = tf.train.CheckpointManager(checkpoint, config.checkpoint_save_dir, max_to_keep=None)

            initial_epoch = initial_step = 0
            if config.ckpt_path:
                initial_epoch, initial_step = resume_from_checkpoint(checkpoint_manager,
                                                                     compression_ctrl,
                                                                     config.ckpt_path,
                                                                     steps_per_epoch)
            else:
                logger.info('Initialization...')
                compression_ctrl.initialize(dataset=calibration_dataset)

    train_step = create_train_step_fn(strategy, compress_model, loss_fn, optimizer)

    logger.info('Training...')
    train(train_step, train_dist_dataset, initial_epoch, initial_step,
          epochs, steps_per_epoch, checkpoint_manager, compression_ctrl, config.log_dir, optimizer)

    logger.info('Compression statistics')
    print_statistics(compression_ctrl.statistics())
예제 #2
0
파일: main.py 프로젝트: yiweichen04/nncf
def export(config):
    model, model_params = get_model(
        config.model,
        input_shape=config.get('input_info', {}).get('sample_size', None),
        num_classes=config.get('num_classes', 1000),
        pretrained=config.get('pretrained', False),
        weights=config.get('weights', None))
    model = model(**model_params)
    compression_ctrl, compress_model = create_compressed_model(
        model, config.nncf_config)

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(name='acc@1'),
        tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='acc@5')
    ]
    loss_obj = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)

    compress_model.compile(loss=loss_obj, metrics=metrics)
    compress_model.summary()

    if config.ckpt_path is not None:
        load_checkpoint(model=compress_model, ckpt_path=config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info('Saved to {}'.format(save_path))
예제 #3
0
def export(config):
    model_builder = get_model_builder(config)
    model = model_builder.build_model(weights=config.get('weights', None))

    compression_ctrl, compress_model = create_compressed_model(
        model, config.nncf_config)

    if config.ckpt_path:
        checkpoint = tf.train.Checkpoint(model=compress_model)
        load_checkpoint(checkpoint, config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info("Saved to {}".format(save_path))
예제 #4
0
def export(config):
    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=False) as model:
        compression_ctrl, compress_model = create_compressed_model(
            model, config.nncf_config)

    if config.ckpt_path:
        variables = get_variables(compress_model)
        checkpoint = tf.train.Checkpoint(variables=variables)
        load_checkpoint(checkpoint, config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info("Saved to {}".format(save_path))
예제 #5
0
def checkpoint_saver(config):
    """
    Load checkpoint and re-save it without optimizer (memory footprint is reduced)
    """
    model_builder = get_model_builder(config)
    model = model_builder.build_model()

    _, compress_model = create_compressed_model(model, config.nncf_config)

    checkpoint = tf.train.Checkpoint(model=compress_model)
    load_checkpoint(checkpoint, config.ckpt_path)

    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    config.checkpoint_save_dir,
                                                    max_to_keep=None)
    save_path = checkpoint_manager.save()
    logger.info('Saved checkpoint: {}'.format(save_path))
예제 #6
0
def run_evaluation(config, eval_timeout=None):
    """Runs evaluation on checkpoint save directory"""
    strategy = get_distribution_strategy(config)
    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    dataset_builder = get_dataset_builders(config,
                                           strategy.num_replicas_in_sync)
    dataset = dataset_builder.build()
    num_batches = dataset_builder.steps_per_epoch
    test_dist_dataset = strategy.experimental_distribute_dataset(dataset)

    # We use `model_batch_size` to create input layer for model
    config.model_batch_size = dataset_builder.batch_size

    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=False) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(
                model, config.nncf_config)
            variables = get_variables(compress_model)
            checkpoint = tf.train.Checkpoint(variables=variables,
                                             step=tf.Variable(0))
            eval_metric = model_builder.eval_metrics()
            predict_post_process_fn = model_builder.post_processing

    test_step = create_test_step_fn(strategy, compress_model,
                                    predict_post_process_fn)

    if 'test' in config.mode:
        if config.ckpt_path:
            load_checkpoint(checkpoint, config.ckpt_path)

        statistics = compression_ctrl.statistics()
        print_statistics(statistics)
        metric_result = evaluate(test_step, eval_metric, test_dist_dataset,
                                 num_batches, config.print_freq)
        eval_metric.reset_states()
        logger.info('Test metric = {}'.format(metric_result))

        if 'export' in config.mode:
            save_path, save_format = get_saving_parameters(config)
            compression_ctrl.export_model(save_path, save_format)
            logger.info("Saved to {}".format(save_path))

    elif 'train' in config.mode:
        validation_summary_writer = SummaryWriter(config.log_dir, 'validation')
        checkpoint_dir = config.checkpoint_save_dir
        eval_timeout = config.eval_timeout

        for checkpoint_path in tf.train.checkpoints_iterator(
                checkpoint_dir, timeout=eval_timeout):
            status = checkpoint.restore(checkpoint_path)
            status.expect_partial()
            logger.info(
                'Checkpoint file {} found and restoring from checkpoint'.
                format(checkpoint_path))
            logger.info('Checkpoint step: {}'.format(checkpoint.step.numpy()))
            metric_result = evaluate(test_step, eval_metric, test_dist_dataset,
                                     num_batches, config.print_freq)

            current_step = checkpoint.step.numpy()
            validation_summary_writer(metrics=metric_result, step=current_step)

            eval_metric.reset_states()
            logger.info('Validation metric = {}'.format(metric_result))

        validation_summary_writer.close()

    if config.metrics_dump is not None:
        write_metrics(metric_result['AP'], config.metrics_dump)
예제 #7
0
파일: main.py 프로젝트: yiweichen04/nncf
def run(config):
    strategy = get_distribution_strategy(config)

    model_fn, model_params = get_model(
        config.model,
        input_shape=config.get('input_info', {}).get('sample_size', None),
        num_classes=config.get('num_classes', 1000),
        pretrained=config.get('pretrained', False),
        weights=config.get('weights', None))

    builders = get_dataset_builders(config, strategy)
    datasets = [builder.build() for builder in builders]

    train_builder, validation_builder = builders
    train_dataset, validation_dataset = datasets

    train_epochs = config.epochs
    train_steps = train_builder.steps_per_epoch
    validation_steps = validation_builder.steps_per_epoch

    with TFOriginalModelManager(model_fn, **model_params) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(
                model, config.nncf_config)
            compression_callbacks = create_compression_callbacks(
                compression_ctrl, log_dir=config.log_dir)

            scheduler = build_scheduler(
                config=config,
                epoch_size=train_builder.num_examples,
                batch_size=train_builder.global_batch_size,
                steps=train_steps)
            optimizer = build_optimizer(config=config, scheduler=scheduler)

            metrics = [
                tf.keras.metrics.CategoricalAccuracy(name='acc@1'),
                tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='acc@5')
            ]
            loss_obj = tf.keras.losses.CategoricalCrossentropy(
                label_smoothing=0.1)

            compress_model.compile(optimizer=optimizer,
                                   loss=loss_obj,
                                   metrics=metrics,
                                   run_eagerly=config.get('eager_mode', False))

            compress_model.summary()

            initial_epoch = 0
            if config.ckpt_path is not None:
                initial_epoch = resume_from_checkpoint(
                    model=compress_model,
                    compression_ctrl=compression_ctrl,
                    ckpt_path=config.ckpt_path,
                    steps_per_epoch=train_steps)
            else:
                logger.info('initialization...')
                compression_ctrl.initialize(dataset=train_dataset)

    callbacks = get_callbacks(model_checkpoint=True,
                              include_tensorboard=True,
                              track_lr=True,
                              write_model_weights=False,
                              initial_step=initial_epoch * train_steps,
                              model_dir=config.log_dir,
                              ckpt_dir=config.checkpoint_save_dir)

    callbacks.extend(compression_callbacks)

    validation_kwargs = {
        'validation_data': validation_dataset,
        'validation_steps': validation_steps,
        'validation_freq': 1,
    }

    if 'train' in config.mode:
        logger.info('training...')
        compress_model.fit(train_dataset,
                           epochs=train_epochs,
                           steps_per_epoch=train_steps,
                           initial_epoch=initial_epoch,
                           callbacks=callbacks,
                           **validation_kwargs)

    logger.info('evaluation...')
    print_statistics(compression_ctrl.statistics())
    compress_model.evaluate(validation_dataset,
                            steps=validation_steps,
                            verbose=1)

    if 'export' in config.mode:
        save_path, save_format = get_saving_parameters(config)
        compression_ctrl.export_model(save_path, save_format)
        logger.info('Saved to {}'.format(save_path))
예제 #8
0
def create_compressed_model_and_algo_for_test(model, config):
    assert isinstance(config, NNCFConfig)
    tf.keras.backend.clear_session()
    algo, model = create_compressed_model(model, config)
    return model, algo
예제 #9
0
def run(config):
    strategy = get_distribution_strategy(config)
    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    # Create dataset
    builders = get_dataset_builders(config, strategy.num_replicas_in_sync)
    datasets = [builder.build() for builder in builders]
    train_builder, test_builder = builders
    train_dataset, test_dataset = datasets
    train_dist_dataset = strategy.experimental_distribute_dataset(
        train_dataset)
    test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)

    # Training parameters
    epochs = config.epochs
    steps_per_epoch = train_builder.steps_per_epoch
    num_test_batches = test_builder.steps_per_epoch

    # Create model builder
    model_builder = get_model_builder(config)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None)) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(
                model, config.nncf_config)

            scheduler = build_scheduler(config=config,
                                        steps_per_epoch=steps_per_epoch)

            optimizer = build_optimizer(config=config, scheduler=scheduler)

            eval_metric = model_builder.eval_metrics()
            loss_fn = model_builder.build_loss_fn(compress_model,
                                                  compression_ctrl.loss)
            predict_post_process_fn = model_builder.post_processing

            checkpoint = tf.train.Checkpoint(model=compress_model,
                                             optimizer=optimizer)
            checkpoint_manager = tf.train.CheckpointManager(
                checkpoint, config.checkpoint_save_dir, max_to_keep=None)

            initial_epoch = initial_step = 0
            if config.ckpt_path:
                initial_epoch, initial_step = resume_from_checkpoint(
                    checkpoint_manager, compression_ctrl, config.ckpt_path,
                    steps_per_epoch, config)
            else:
                logger.info('Initialization...')
                compression_ctrl.initialize(dataset=train_dataset)

    train_step = create_train_step_fn(strategy, compress_model, loss_fn,
                                      optimizer)
    test_step = create_test_step_fn(strategy, compress_model,
                                    predict_post_process_fn)

    if 'train' in config.mode:
        train(train_step, test_step, eval_metric, train_dist_dataset,
              test_dist_dataset, initial_epoch, initial_step, epochs,
              steps_per_epoch, checkpoint_manager, compression_ctrl,
              config.log_dir, optimizer, num_test_batches, config.print_freq)

    print_statistics(compression_ctrl.statistics())
    metric_result = evaluate(test_step, eval_metric, test_dist_dataset,
                             num_test_batches, config.print_freq)
    logger.info('Validation metric = {}'.format(metric_result))

    if config.metrics_dump is not None:
        write_metrics(metric_result['AP'], config.metrics_dump)

    if 'export' in config.mode:
        save_path, save_format = get_saving_parameters(config)
        compression_ctrl.export_model(save_path, save_format)
        logger.info("Saved to {}".format(save_path))