コード例 #1
0
def test_checkpoint_callback_make_checkpoints(save_freq):
    compression_ctrl, model = get_simple_compressed_model()
    compression_callbacks = create_compression_callbacks(compression_ctrl, log_tensorboard=False)
    dataset_len = 8

    dummy_x = tf.random.normal((dataset_len,) + model.input_shape[1:])
    dummy_y = tf.random.normal((dataset_len,) + model.output_shape[1:])

    model.compile(loss=tf.losses.CategoricalCrossentropy())

    ckpt_path = tempfile.mkdtemp()
    ckpt = tf.train.Checkpoint(model=model,
                               compression_state=TFCompressionState(compression_ctrl))
    model.fit(dummy_x, dummy_y,
              epochs=5,
              batch_size=2,
              callbacks=[CheckpointManagerCallback(ckpt, ckpt_path, save_freq),
                         *compression_callbacks])

    assert sorted(os.listdir(ckpt_path)) == REF_CKPT_DIR[save_freq]

    new_compression_ctrl, new_model = get_simple_compressed_model()
    new_ckpt = tf.train.Checkpoint(model=new_model,
                                   compression_state=TFCompressionState(new_compression_ctrl))
    new_ckpt.restore(tf.train.latest_checkpoint(ckpt_path))
    assert new_compression_ctrl.get_state() == compression_ctrl.get_state()
    assert tf.reduce_all([tf.reduce_all(w_new == w) for w_new, w in zip(new_model.weights, model.weights)])

    shutil.rmtree(ckpt_path)
コード例 #2
0
def test_checkpoint_callback_make_checkpoints(mocker, tmp_path):
    save_freq = 2
    config = get_basic_quantization_config()
    gen_setup_spy = mocker.spy(QuantizationBuilder, '_get_quantizer_setup')

    model, compression_ctrl = create_compressed_model_and_algo_for_test(
        get_basic_conv_test_model(), config, force_no_init=True)
    assert isinstance(compression_ctrl, QuantizationController)

    quantizer_setup = gen_setup_spy.spy_return
    compression_callbacks = create_compression_callbacks(compression_ctrl,
                                                         log_tensorboard=False)
    dataset_len = 8

    dummy_x = tf.random.normal((dataset_len, ) + model.input_shape[1:])
    dummy_y = tf.random.normal((dataset_len, ) + model.output_shape[1:])

    model.compile(loss=tf.losses.CategoricalCrossentropy())

    ckpt_path = tmp_path / 'checkpoint'
    ckpt = tf.train.Checkpoint(
        model=model, compression_state=TFCompressionState(compression_ctrl))
    model.fit(dummy_x,
              dummy_y,
              epochs=5,
              batch_size=2,
              callbacks=[
                  CheckpointManagerCallback(ckpt, str(ckpt_path), save_freq),
                  *compression_callbacks
              ])

    assert sorted(os.listdir(ckpt_path)) == REF_CKPT_DIR[save_freq]

    new_compression_state = load_compression_state(ckpt_path)

    new_model, new_compression_ctrl = create_compressed_model_and_algo_for_test(
        get_basic_conv_test_model(), config, new_compression_state)
    new_model.compile(loss=tf.losses.CategoricalCrossentropy())
    new_ckpt = tf.train.Checkpoint(
        model=new_model,
        compression_state=TFCompressionState(new_compression_ctrl))
    load_checkpoint(new_ckpt, ckpt_path)

    builder = QuantizationBuilder(config)
    builder.load_state(new_compression_state['builder_state'])
    # pylint:disable=protected-access
    new_quantizer_setup = builder._quantizer_setup

    assert _quantization_setup_cmp(quantizer_setup, new_quantizer_setup)
    assert new_compression_ctrl.get_state() == compression_ctrl.get_state()
    assert tf.reduce_all([
        tf.reduce_all(w_new == w)
        for w_new, w in zip(new_model.weights, model.weights)
    ])
コード例 #3
0
def _save_and_load_compression_state(compression_ctrl, tmp_path):
    checkpoint_path = tmp_path / 'compression_state'
    checkpoint_to_save = tf.train.Checkpoint(
        compression_state=TFCompressionState(compression_ctrl))
    checkpoint_to_save.save(checkpoint_path)

    compression_state = load_compression_state(str(checkpoint_path.parent))

    return compression_state
コード例 #4
0
def od_checkpoint_saver(config):
    """
    Load object detection checkpoint and re-save it without optimizer (memory footprint is reduced).
    """
    model_builder = get_model_od_builder(config)
    model = model_builder.build_model()

    compression_state = load_compression_state(config.ckpt_path)
    compression_ctrl, compress_model = create_compressed_model(
        model, config.nncf_config, compression_state)

    checkpoint = tf.train.Checkpoint(
        model=compress_model,
        compression_state=TFCompressionState(compression_ctrl))
    load_and_save_checkpoint(checkpoint, config)
コード例 #5
0
def seg_checkpoint_saver(config):
    """
    Load segmentation checkpoint and re-save it without optimizer (memory footprint is reduced).
    """
    model_builder = get_model_seg_builder(config)
    model = model_builder.build_model()

    compression_state = load_compression_state(config.ckpt_path)
    compression_ctrl, compress_model = create_compressed_model(
        model, config.nncf_config, compression_state)

    variables = get_variables(compress_model)
    checkpoint = tf.train.Checkpoint(
        variables=variables,
        compression_state=TFCompressionState(compression_ctrl),
        step=tf.Variable(0))
    load_and_save_checkpoint(checkpoint, config)
コード例 #6
0
def export(config):
    model_builder = get_model_builder(config)
    model = model_builder.build_model(weights=config.get('weights', None))

    compression_state = None
    if config.ckpt_path:
        compression_state = load_compression_state(config.ckpt_path)

    compression_ctrl, compress_model = create_compressed_model(
        model, config.nncf_config, compression_state)

    if config.ckpt_path:
        checkpoint = tf.train.Checkpoint(
            model=compress_model,
            compression_state=TFCompressionState(compression_ctrl))
        load_checkpoint(checkpoint, config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info("Saved to {}".format(save_path))
コード例 #7
0
def restore_compressed_model(config, strategy, model_builder, ckpt_path = None):
    compression_state = None
    if ckpt_path:
        compression_state = load_compression_state(ckpt_path)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=False) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(model,
                                                                       config.nncf_config,
                                                                       compression_state)

            variables = get_variables(compress_model)
            checkpoint = tf.train.Checkpoint(variables=variables,
                                             compression_state=TFCompressionState(compression_ctrl),
                                             step=tf.Variable(0))
            if ckpt_path:
                load_checkpoint(checkpoint, config.ckpt_path)

    return compression_ctrl, compress_model, checkpoint
コード例 #8
0
ファイル: train.py プロジェクト: openvinotoolkit/nncf
def run_train(config):
    strategy = get_distribution_strategy(config)

    # Create dataset
    builders = get_dataset_builders(config, strategy.num_replicas_in_sync)

    datasets = [builder.build() for builder in builders]
    train_builder, _ = builders
    train_dataset, calibration_dataset = datasets
    train_dist_dataset = strategy.experimental_distribute_dataset(
        train_dataset)

    # Training parameters
    epochs = config.epochs
    steps_per_epoch = train_builder.steps_per_epoch

    # We use `model_batch_size` to create input layer for model
    config.model_batch_size = train_builder.batch_size

    # Create model builder
    model_builder = get_model_builder(config)

    # Register additional parameters in the NNCFConfig for initialization
    # the compressed model during building
    nncf_config = config.nncf_config
    nncf_config = register_default_init_args(
        nncf_config=nncf_config,
        data_loader=calibration_dataset,
        batch_size=train_builder.global_batch_size)

    resume_training = config.ckpt_path is not None

    compression_state = None
    if resume_training:
        compression_state = load_compression_state(config.ckpt_path)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None),
                                is_training=True) as model:
        with strategy.scope():
            compression_ctrl, compress_model = create_compressed_model(
                model, nncf_config, compression_state)

            scheduler = build_scheduler(config=config,
                                        steps_per_epoch=steps_per_epoch)

            optimizer = build_optimizer(config=config, scheduler=scheduler)

            loss_fn = model_builder.build_loss_fn(compress_model,
                                                  compression_ctrl.loss)

            variables = get_variables(compress_model)
            checkpoint = tf.train.Checkpoint(
                variables=variables,
                optimizer=optimizer,
                compression_state=TFCompressionState(compression_ctrl),
                step=tf.Variable(0))
            checkpoint_manager = tf.train.CheckpointManager(
                checkpoint, config.checkpoint_save_dir, max_to_keep=None)

            initial_epoch = initial_step = 0
            if resume_training:
                initial_epoch, initial_step = resume_from_checkpoint(
                    checkpoint_manager, config.ckpt_path, steps_per_epoch)

    statistics = compression_ctrl.statistics()
    logger.info(statistics.to_str())

    train_step = create_train_step_fn(strategy, compress_model, loss_fn,
                                      optimizer)

    train(train_step, train_dist_dataset, initial_epoch, initial_step, epochs,
          steps_per_epoch, checkpoint_manager, compression_ctrl,
          config.log_dir, optimizer, config.print_freq)

    logger.info('Compression statistics')
    statistics = compression_ctrl.statistics()
    logger.info(statistics.to_str())
コード例 #9
0
def run(config):
    strategy = get_distribution_strategy(config)
    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    # Create dataset
    train_builder, test_builder = get_dataset_builders(
        config, strategy.num_replicas_in_sync)
    train_dataset = train_builder.build()
    test_dataset = test_builder.build()
    train_dist_dataset = strategy.experimental_distribute_dataset(
        train_dataset)
    test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)

    # Training parameters
    epochs = config.epochs
    steps_per_epoch = train_builder.steps_per_epoch
    num_test_batches = test_builder.steps_per_epoch

    # Create model builder
    model_builder = get_model_builder(config)

    def model_eval_fn(model):
        test_step = create_test_step_fn(strategy, model,
                                        model_builder.post_processing)
        metric_result = evaluate(test_step, model_builder.eval_metrics(),
                                 test_dist_dataset, num_test_batches,
                                 config.print_freq)
        return metric_result['AP']

    # Register additional parameters in the NNCFConfig for initialization
    # the compressed model during building
    nncf_config = config.nncf_config
    nncf_config = register_default_init_args(
        nncf_config=nncf_config,
        data_loader=train_dataset,
        batch_size=train_builder.global_batch_size)

    resume_training = config.ckpt_path is not None

    compression_state = None
    if resume_training:
        compression_state = load_compression_state(config.ckpt_path)

    with TFOriginalModelManager(model_builder.build_model,
                                weights=config.get('weights', None)) as model:
        with strategy.scope():
            config.nncf_config.register_extra_structs(
                [ModelEvaluationArgs(eval_fn=model_eval_fn)])
            compression_ctrl, compress_model = create_compressed_model(
                model, nncf_config, compression_state)
            scheduler = build_scheduler(config=config,
                                        steps_per_epoch=steps_per_epoch)

            optimizer = build_optimizer(config=config, scheduler=scheduler)

            eval_metric = model_builder.eval_metrics()
            loss_fn = model_builder.build_loss_fn(compress_model,
                                                  compression_ctrl.loss)
            predict_post_process_fn = model_builder.post_processing

            checkpoint = tf.train.Checkpoint(
                model=compress_model,
                optimizer=optimizer,
                compression_state=TFCompressionState(compression_ctrl))
            checkpoint_manager = tf.train.CheckpointManager(
                checkpoint, config.checkpoint_save_dir, max_to_keep=None)

            initial_epoch = initial_step = 0
            if resume_training:
                initial_epoch, initial_step = resume_from_checkpoint(
                    checkpoint_manager, config.ckpt_path, steps_per_epoch)

    train_step = create_train_step_fn(strategy, compress_model, loss_fn,
                                      optimizer)
    test_step = create_test_step_fn(strategy, compress_model,
                                    predict_post_process_fn)

    if 'train' in config.mode:
        if is_accuracy_aware_training(config):
            train_summary_writer = SummaryWriter(config.log_dir, 'train')
            timer = Timer()
            timer.tic()

            def train_epoch_fn(compression_ctrl, model, epoch, **kwargs):
                train_step = create_train_step_fn(strategy, model, loss_fn,
                                                  optimizer)
                train_epoch(train_step, compression_ctrl, epoch, initial_epoch,
                            steps_per_epoch, optimizer, checkpoint_manager,
                            train_dist_dataset, train_summary_writer,
                            initial_step, config.print_freq, timer)

            def validate_fn(model, **kwargs):
                test_step = create_test_step_fn(strategy, model,
                                                predict_post_process_fn)
                metric_result = evaluate(test_step, eval_metric,
                                         test_dist_dataset, num_test_batches,
                                         config.print_freq)
                return metric_result['AP']

            acc_aware_training_loop = create_accuracy_aware_training_loop(
                nncf_config, compression_ctrl)
            compress_model = acc_aware_training_loop.run(
                compress_model,
                train_epoch_fn=train_epoch_fn,
                validate_fn=validate_fn,
                tensorboard_writer=SummaryWriter(config.log_dir,
                                                 'accuracy_aware_training'),
                log_dir=config.log_dir)
        else:
            train(train_step, test_step, eval_metric, train_dist_dataset,
                  test_dist_dataset, initial_epoch, initial_step, epochs,
                  steps_per_epoch, checkpoint_manager, compression_ctrl,
                  config.log_dir, optimizer, num_test_batches,
                  config.print_freq)

    statistics = compression_ctrl.statistics()
    logger.info(statistics.to_str())
    metric_result = evaluate(test_step, eval_metric, test_dist_dataset,
                             num_test_batches, config.print_freq)
    logger.info('Validation metric = {}'.format(metric_result))

    if config.metrics_dump is not None:
        write_metrics(metric_result['AP'], config.metrics_dump)

    if 'export' in config.mode:
        save_path, save_format = get_saving_parameters(config)
        compression_ctrl.export_model(save_path, save_format)
        logger.info("Saved to {}".format(save_path))