def run_train(config): strategy = get_distribution_strategy(config) # Create dataset builders = get_dataset_builders(config, strategy) datasets = [builder.build() for builder in builders] train_builder, _ = builders train_dataset, calibration_dataset = datasets train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset) # Training parameters epochs = config.epochs steps_per_epoch = train_builder.steps_per_epoch # We use `model_batch_size` to create input layer for model config.model_batch_size = train_builder.batch_size # Create model builder model_builder = get_model_builder(config) with TFOriginalModelManager(model_builder.build_model, weights=config.get('weights', None), is_training=True) as model: with strategy.scope(): compression_ctrl, compress_model = create_compressed_model(model, config.nncf_config) scheduler = build_scheduler( config=config, epoch_size=train_builder.num_examples, batch_size=train_builder.global_batch_size, steps=steps_per_epoch) optimizer = build_optimizer( config=config, scheduler=scheduler) loss_fn = model_builder.build_loss_fn() variables = get_variables(compress_model) checkpoint = tf.train.Checkpoint(variables=variables, optimizer=optimizer, step=tf.Variable(0)) checkpoint_manager = tf.train.CheckpointManager(checkpoint, config.checkpoint_save_dir, max_to_keep=None) initial_epoch = initial_step = 0 if config.ckpt_path: initial_epoch, initial_step = resume_from_checkpoint(checkpoint_manager, compression_ctrl, config.ckpt_path, steps_per_epoch) else: logger.info('Initialization...') compression_ctrl.initialize(dataset=calibration_dataset) train_step = create_train_step_fn(strategy, compress_model, loss_fn, optimizer) logger.info('Training...') train(train_step, train_dist_dataset, initial_epoch, initial_step, epochs, steps_per_epoch, checkpoint_manager, compression_ctrl, config.log_dir, optimizer) logger.info('Compression statistics') print_statistics(compression_ctrl.statistics())
def export(config): model, model_params = get_model( config.model, input_shape=config.get('input_info', {}).get('sample_size', None), num_classes=config.get('num_classes', 1000), pretrained=config.get('pretrained', False), weights=config.get('weights', None)) model = model(**model_params) compression_ctrl, compress_model = create_compressed_model( model, config.nncf_config) metrics = [ tf.keras.metrics.CategoricalAccuracy(name='acc@1'), tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='acc@5') ] loss_obj = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1) compress_model.compile(loss=loss_obj, metrics=metrics) compress_model.summary() if config.ckpt_path is not None: load_checkpoint(model=compress_model, ckpt_path=config.ckpt_path) save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info('Saved to {}'.format(save_path))
def export(config): model_builder = get_model_builder(config) model = model_builder.build_model(weights=config.get('weights', None)) compression_ctrl, compress_model = create_compressed_model( model, config.nncf_config) if config.ckpt_path: checkpoint = tf.train.Checkpoint(model=compress_model) load_checkpoint(checkpoint, config.ckpt_path) save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path))
def export(config): model_builder = get_model_builder(config) with TFOriginalModelManager(model_builder.build_model, weights=config.get('weights', None), is_training=False) as model: compression_ctrl, compress_model = create_compressed_model( model, config.nncf_config) if config.ckpt_path: variables = get_variables(compress_model) checkpoint = tf.train.Checkpoint(variables=variables) load_checkpoint(checkpoint, config.ckpt_path) save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path))
def checkpoint_saver(config): """ Load checkpoint and re-save it without optimizer (memory footprint is reduced) """ model_builder = get_model_builder(config) model = model_builder.build_model() _, compress_model = create_compressed_model(model, config.nncf_config) checkpoint = tf.train.Checkpoint(model=compress_model) load_checkpoint(checkpoint, config.ckpt_path) checkpoint_manager = tf.train.CheckpointManager(checkpoint, config.checkpoint_save_dir, max_to_keep=None) save_path = checkpoint_manager.save() logger.info('Saved checkpoint: {}'.format(save_path))
def run_evaluation(config, eval_timeout=None): """Runs evaluation on checkpoint save directory""" strategy = get_distribution_strategy(config) if config.metrics_dump is not None: write_metrics(0, config.metrics_dump) dataset_builder = get_dataset_builders(config, strategy.num_replicas_in_sync) dataset = dataset_builder.build() num_batches = dataset_builder.steps_per_epoch test_dist_dataset = strategy.experimental_distribute_dataset(dataset) # We use `model_batch_size` to create input layer for model config.model_batch_size = dataset_builder.batch_size model_builder = get_model_builder(config) with TFOriginalModelManager(model_builder.build_model, weights=config.get('weights', None), is_training=False) as model: with strategy.scope(): compression_ctrl, compress_model = create_compressed_model( model, config.nncf_config) variables = get_variables(compress_model) checkpoint = tf.train.Checkpoint(variables=variables, step=tf.Variable(0)) eval_metric = model_builder.eval_metrics() predict_post_process_fn = model_builder.post_processing test_step = create_test_step_fn(strategy, compress_model, predict_post_process_fn) if 'test' in config.mode: if config.ckpt_path: load_checkpoint(checkpoint, config.ckpt_path) statistics = compression_ctrl.statistics() print_statistics(statistics) metric_result = evaluate(test_step, eval_metric, test_dist_dataset, num_batches, config.print_freq) eval_metric.reset_states() logger.info('Test metric = {}'.format(metric_result)) if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path)) elif 'train' in config.mode: validation_summary_writer = SummaryWriter(config.log_dir, 'validation') checkpoint_dir = config.checkpoint_save_dir eval_timeout = config.eval_timeout for checkpoint_path in tf.train.checkpoints_iterator( checkpoint_dir, timeout=eval_timeout): status = checkpoint.restore(checkpoint_path) status.expect_partial() logger.info( 'Checkpoint file {} found and restoring from checkpoint'. format(checkpoint_path)) logger.info('Checkpoint step: {}'.format(checkpoint.step.numpy())) metric_result = evaluate(test_step, eval_metric, test_dist_dataset, num_batches, config.print_freq) current_step = checkpoint.step.numpy() validation_summary_writer(metrics=metric_result, step=current_step) eval_metric.reset_states() logger.info('Validation metric = {}'.format(metric_result)) validation_summary_writer.close() if config.metrics_dump is not None: write_metrics(metric_result['AP'], config.metrics_dump)
def run(config): strategy = get_distribution_strategy(config) model_fn, model_params = get_model( config.model, input_shape=config.get('input_info', {}).get('sample_size', None), num_classes=config.get('num_classes', 1000), pretrained=config.get('pretrained', False), weights=config.get('weights', None)) builders = get_dataset_builders(config, strategy) datasets = [builder.build() for builder in builders] train_builder, validation_builder = builders train_dataset, validation_dataset = datasets train_epochs = config.epochs train_steps = train_builder.steps_per_epoch validation_steps = validation_builder.steps_per_epoch with TFOriginalModelManager(model_fn, **model_params) as model: with strategy.scope(): compression_ctrl, compress_model = create_compressed_model( model, config.nncf_config) compression_callbacks = create_compression_callbacks( compression_ctrl, log_dir=config.log_dir) scheduler = build_scheduler( config=config, epoch_size=train_builder.num_examples, batch_size=train_builder.global_batch_size, steps=train_steps) optimizer = build_optimizer(config=config, scheduler=scheduler) metrics = [ tf.keras.metrics.CategoricalAccuracy(name='acc@1'), tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='acc@5') ] loss_obj = tf.keras.losses.CategoricalCrossentropy( label_smoothing=0.1) compress_model.compile(optimizer=optimizer, loss=loss_obj, metrics=metrics, run_eagerly=config.get('eager_mode', False)) compress_model.summary() initial_epoch = 0 if config.ckpt_path is not None: initial_epoch = resume_from_checkpoint( model=compress_model, compression_ctrl=compression_ctrl, ckpt_path=config.ckpt_path, steps_per_epoch=train_steps) else: logger.info('initialization...') compression_ctrl.initialize(dataset=train_dataset) callbacks = get_callbacks(model_checkpoint=True, include_tensorboard=True, track_lr=True, write_model_weights=False, initial_step=initial_epoch * train_steps, model_dir=config.log_dir, ckpt_dir=config.checkpoint_save_dir) callbacks.extend(compression_callbacks) validation_kwargs = { 'validation_data': validation_dataset, 'validation_steps': validation_steps, 'validation_freq': 1, } if 'train' in config.mode: logger.info('training...') compress_model.fit(train_dataset, epochs=train_epochs, steps_per_epoch=train_steps, initial_epoch=initial_epoch, callbacks=callbacks, **validation_kwargs) logger.info('evaluation...') print_statistics(compression_ctrl.statistics()) compress_model.evaluate(validation_dataset, steps=validation_steps, verbose=1) if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info('Saved to {}'.format(save_path))
def create_compressed_model_and_algo_for_test(model, config): assert isinstance(config, NNCFConfig) tf.keras.backend.clear_session() algo, model = create_compressed_model(model, config) return model, algo
def run(config): strategy = get_distribution_strategy(config) if config.metrics_dump is not None: write_metrics(0, config.metrics_dump) # Create dataset builders = get_dataset_builders(config, strategy.num_replicas_in_sync) datasets = [builder.build() for builder in builders] train_builder, test_builder = builders train_dataset, test_dataset = datasets train_dist_dataset = strategy.experimental_distribute_dataset( train_dataset) test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset) # Training parameters epochs = config.epochs steps_per_epoch = train_builder.steps_per_epoch num_test_batches = test_builder.steps_per_epoch # Create model builder model_builder = get_model_builder(config) with TFOriginalModelManager(model_builder.build_model, weights=config.get('weights', None)) as model: with strategy.scope(): compression_ctrl, compress_model = create_compressed_model( model, config.nncf_config) scheduler = build_scheduler(config=config, steps_per_epoch=steps_per_epoch) optimizer = build_optimizer(config=config, scheduler=scheduler) eval_metric = model_builder.eval_metrics() loss_fn = model_builder.build_loss_fn(compress_model, compression_ctrl.loss) predict_post_process_fn = model_builder.post_processing checkpoint = tf.train.Checkpoint(model=compress_model, optimizer=optimizer) checkpoint_manager = tf.train.CheckpointManager( checkpoint, config.checkpoint_save_dir, max_to_keep=None) initial_epoch = initial_step = 0 if config.ckpt_path: initial_epoch, initial_step = resume_from_checkpoint( checkpoint_manager, compression_ctrl, config.ckpt_path, steps_per_epoch, config) else: logger.info('Initialization...') compression_ctrl.initialize(dataset=train_dataset) train_step = create_train_step_fn(strategy, compress_model, loss_fn, optimizer) test_step = create_test_step_fn(strategy, compress_model, predict_post_process_fn) if 'train' in config.mode: train(train_step, test_step, eval_metric, train_dist_dataset, test_dist_dataset, initial_epoch, initial_step, epochs, steps_per_epoch, checkpoint_manager, compression_ctrl, config.log_dir, optimizer, num_test_batches, config.print_freq) print_statistics(compression_ctrl.statistics()) metric_result = evaluate(test_step, eval_metric, test_dist_dataset, num_test_batches, config.print_freq) logger.info('Validation metric = {}'.format(metric_result)) if config.metrics_dump is not None: write_metrics(metric_result['AP'], config.metrics_dump) if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path))