예제 #1
0
def test_memory_cost_metric(memory_cost_metric_test_struct):
    config = get_basic_quantization_config()
    config['compression']['initializer'].update(memory_cost_metric_test_struct.initializers)
    config['compression']["weights"] = memory_cost_metric_test_struct.weights
    config['compression']["ignored_scopes"] = memory_cost_metric_test_struct.ignored_scopes
    config['target_device'] = memory_cost_metric_test_struct.target_device
    ctrl, compressed_model = create_compressed_model(test_models.AlexNet(), config)
    qmetric = MemoryCostMetric(compressed_model, ctrl.weight_quantizers, ctrl.non_weight_quantizers)
    qmetric.collect()

    assert qmetric.stat == approx(memory_cost_metric_test_struct.table, rel=1e-2)
예제 #2
0
def test_share_edges_quantized_data_path(share_edges_quantized_data_path_test_struct):
    config = get_basic_quantization_config()
    config['compression']["ignored_scopes"] = share_edges_quantized_data_path_test_struct.ignored_scopes
    config['input_info']['sample_size'] = [2, 3, 299, 299]
    config['quantizer_setup_type'] = share_edges_quantized_data_path_test_struct.quantizer_setup_type

    _, compressed_model = create_compressed_model(test_models.Inception3(aux_logits=True, transform_input=True), config)
    qmetric = ShareEdgesQuantizedDataPath(compressed_model)
    qmetric.collect()
    # pylint: disable=protected-access
    qmetric_stat = qmetric._get_copy_statistics()
    assert qmetric_stat == approx(share_edges_quantized_data_path_test_struct.table, rel=1e-2)
예제 #3
0
def create_model(config):
    input_info_list = create_input_infos(config)
    image_size = input_info_list[0].shape[-1]
    ssd_net = build_ssd(config.model, config.ssd_params, image_size,
                        config.num_classes, config)
    compression_ctrl, ssd_net = create_compressed_model(ssd_net, config)
    weights = config.get('weights')
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(ssd_net, sd)
    ssd_net.train()
    model, _ = prepare_model_for_execution(ssd_net, config)
    return compression_ctrl, model
예제 #4
0
def export(config):
    model_builder = retinanet_model.RetinanetModel(config)
    model = model_builder.build_model(pretrained=config.get(
        'pretrained', True),
                                      weights=config.get('weights', None),
                                      mode=ModeKeys.PREDICT_WITH_GT)

    compression_ctrl, compress_model = create_compressed_model(model, config)

    if config.ckpt_path:
        checkpoint = tf.train.Checkpoint(model=compress_model)
        load_checkpoint(checkpoint, config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info("Saved to {}".format(save_path))
예제 #5
0
파일: main.py 프로젝트: zbrnwpu/nncf
def create_model(config: SampleConfig, resuming_model_sd: dict = None):
    input_info_list = create_input_infos(config.nncf_config)
    image_size = input_info_list[0].shape[-1]
    ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config)
    weights = config.get('weights')
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(ssd_net, sd)

    ssd_net.to(config.device)

    compression_ctrl, compressed_model = create_compressed_model(ssd_net, config.nncf_config, resuming_model_sd)
    compressed_model, _ = prepare_model_for_execution(compressed_model, config)

    compressed_model.train()
    return compression_ctrl, compressed_model
예제 #6
0
def test_network_quantization_share_metric(network_quantization_share_metric_test_struct):
    config = get_basic_quantization_config()
    config['compression']['initializer'].update(network_quantization_share_metric_test_struct.initializers)
    config['compression']["activations"] = network_quantization_share_metric_test_struct.activations
    config['compression']["weights"] = network_quantization_share_metric_test_struct.weights
    config['compression']["ignored_scopes"] = network_quantization_share_metric_test_struct.ignored_scopes
    config['quantizer_setup_type'] = network_quantization_share_metric_test_struct.quantizer_setup_type
    config['target_device'] = network_quantization_share_metric_test_struct.target_device
    cntrl, compressed_model = create_compressed_model(test_models.AlexNet(), config)
    quantizer_setup_type = QuantizerSetupType.PATTERN_BASED if config['quantizer_setup_type'] == 'pattern_based'\
         else QuantizerSetupType.PROPAGATION_BASED
    qmetric = NQSM(compressed_model, cntrl.weight_quantizers,\
         cntrl.non_weight_quantizers, quantizer_setup_type)
    qmetric.collect()
    # pylint: disable=protected-access
    qmetric_stat = qmetric._get_copy_statistics()
    for key, value in network_quantization_share_metric_test_struct.table.items():
        assert qmetric_stat[key] == approx(value, rel=1e-2)
예제 #7
0
def test_network_quantization_share_metric(
        network_quantization_share_metric_test_struct):
    config = get_basic_quantization_config()
    config['compression']['initializer'].update(
        network_quantization_share_metric_test_struct.initializers)
    config['compression'][
        "activations"] = network_quantization_share_metric_test_struct.activations
    config['compression'][
        "weights"] = network_quantization_share_metric_test_struct.weights
    config['compression'][
        "ignored_scopes"] = network_quantization_share_metric_test_struct.ignored_scopes
    config[
        'quantizer_setup_type'] = network_quantization_share_metric_test_struct.quantizer_setup_type
    config[
        'target_device'] = network_quantization_share_metric_test_struct.target_device
    ctrl, _ = create_compressed_model(test_models.AlexNet(), config)
    qmetric = ctrl.non_stable_metric_collectors[0]
    qmetric.collect()
    # pylint: disable=protected-access
    qmetric_stat = qmetric._get_copy_statistics()
    for key, value in network_quantization_share_metric_test_struct.table.items(
    ):
        assert qmetric_stat[key] == approx(value, rel=1e-2)
예제 #8
0
def export(config):
    raise NotImplementedError('Experemental code, please use train + export mode, '
                              'don\'t use only export mode')
    model = tf.keras.Sequential(
        hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/4",
                       trainable=True))
    model.build([None, 224, 224, 3])

    compression_ctrl, compress_model = create_compressed_model(model, config)

    metrics = get_metrics()
    loss_obj = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)

    compress_model.compile(loss=loss_obj,
                           metrics=metrics)
    compress_model.summary()

    if config.ckpt_path is not None:
        load_checkpoint(model=compress_model,
                        ckpt_path=config.ckpt_path)

    save_path, save_format = get_saving_parameters(config)
    compression_ctrl.export_model(save_path, save_format)
    logger.info("Saved to {}".format(save_path))
예제 #9
0
def train_test_export(config):
    strategy = get_distribution_strategy(config)
    strategy_scope = get_strategy_scope(strategy)

    # Training parameters
    NUM_EXAMPLES_TRAIN = 118287
    NUM_EXAMPLES_EVAL = 5000
    epochs = config.epochs
    batch_size = config.batch_size  # per replica batch size
    num_devices = strategy.num_replicas_in_sync if strategy else 1
    global_batch_size = batch_size * num_devices
    steps_per_epoch = NUM_EXAMPLES_TRAIN // global_batch_size

    # Create Dataset
    train_input_fn = input_reader.InputFn(
        file_pattern=config.train_file_pattern,
        params=config,
        mode=input_reader.ModeKeys.TRAIN,
        batch_size=global_batch_size)

    eval_input_fn = input_reader.InputFn(
        file_pattern=config.eval_file_pattern,
        params=config,
        mode=input_reader.ModeKeys.PREDICT_WITH_GT,
        batch_size=global_batch_size,
        num_examples=NUM_EXAMPLES_EVAL)

    train_dist_dataset = strategy.experimental_distribute_dataset(
        train_input_fn())
    test_dist_dataset = strategy.experimental_distribute_dataset(
        eval_input_fn())

    # Create model builder
    mode = ModeKeys.TRAIN if 'train' in config.mode else ModeKeys.PREDICT_WITH_GT
    model_builder = retinanet_model.RetinanetModel(config)
    eval_metric = model_builder.eval_metrics

    with strategy_scope:
        model = model_builder.build_model(pretrained=config.get(
            'pretrained', True),
                                          weights=config.get('weights', None),
                                          mode=mode)

        compression_ctrl, compress_model = create_compressed_model(
            model, config)
        # compression_callbacks = create_compression_callbacks(compression_ctrl, config.log_dir)

        scheduler = build_scheduler(config=config,
                                    epoch_size=NUM_EXAMPLES_TRAIN,
                                    batch_size=global_batch_size,
                                    steps=steps_per_epoch)

        optimizer = build_optimizer(config=config, scheduler=scheduler)

        eval_metric = model_builder.eval_metrics()
        loss_fn = model_builder.build_loss_fn()
        predict_post_process_fn = model_builder.post_processing

        checkpoint = tf.train.Checkpoint(model=compress_model,
                                         optimizer=optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint, config.checkpoint_save_dir, max_to_keep=None)

        logger.info('initialization...')
        compression_ctrl.initialize(dataset=train_input_fn())

        initial_epoch = 0
        if config.ckpt_path:
            initial_epoch = resume_from_checkpoint(checkpoint_manager,
                                                   config.ckpt_path,
                                                   steps_per_epoch)

    train_step = create_train_step_fn(strategy, compress_model, loss_fn,
                                      optimizer)
    test_step = create_test_step_fn(strategy, compress_model,
                                    predict_post_process_fn)

    if 'train' in config.mode:
        logger.info('Training...')
        train(train_step, test_step, eval_metric, train_dist_dataset,
              test_dist_dataset, initial_epoch, epochs, steps_per_epoch,
              checkpoint_manager, compression_ctrl, config.log_dir, optimizer)

    logger.info('Evaluation...')
    metric_result = evaluate(test_step, eval_metric, test_dist_dataset)
    logger.info('Validation metric = {}'.format(metric_result))

    if 'export' in config.mode:
        save_path, save_format = get_saving_parameters(config)
        compression_ctrl.export_model(save_path, save_format)
        logger.info("Saved to {}".format(save_path))
예제 #10
0
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    logger.info(config)

    config.device = get_device(config)
    dataset = get_dataset(config.dataset)
    color_encoding = dataset.color_encoding
    num_classes = len(color_encoding)

    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    weights = config.get('weights')
    model = load_model(config.model,
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=num_classes,
                       model_params=config.get('model_params', {}))
    compression_ctrl, model = create_compressed_model(model, config)
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(model, sd)

    model, model_without_dp = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_ctrl.distributed()

    resuming_checkpoint = config.resuming_checkpoint

    if resuming_checkpoint is not None:
        if not config.pretrained:
            # Load the previously saved model state
            model, _, _, _, _ = \
                load_checkpoint(model, resuming_checkpoint, config.device,
                                compression_scheduler=compression_ctrl.scheduler)

    if config.to_onnx is not None:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.mode.lower() == 'test':
        logger.info(model)
        model_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
        params = sum([np.prod(p.size()) for p in model_parameters])
        logger.info("Trainable argument count:{params}".format(params=params))

        model = model.to(config.device)
        loaders, w_class = load_dataset(dataset, config)
        _, val_loader = loaders
        test(model, val_loader, w_class, color_encoding, config)
        print_statistics(compression_ctrl.statistics())
    elif config.mode.lower() == 'train':
        loaders, w_class = load_dataset(dataset, config)
        train_loader, val_loader = loaders
        if not resuming_checkpoint:
            compression_ctrl.initialize(train_loader)
        train(model, model_without_dp, compression_ctrl, train_loader,
              val_loader, w_class, color_encoding, config)
    else:
        # Should never happen...but just in case it does
        raise RuntimeError(
            "\"{0}\" is not a valid choice for execution mode.".format(
                config.mode))
예제 #11
0
def process(rank, args, port):
    #init multiprocess
    if rank < 0:
        args.device = torch.device("cpu" if args.n_gpu < 1 else "cuda")
    else:
        # create default process group
        os.environ['MASTER_ADDR'] = 'localhost'
        os.environ['MASTER_PORT'] = str(port)
        torch.distributed.init_process_group("nccl",
                                             rank=rank,
                                             world_size=args.n_gpu)
        args.device = torch.device("cuda:{}".format(rank))
        torch.cuda.set_device(rank)

    if rank > 0:
        #wait while process 0 load models
        torch.distributed.barrier()

    printlog("rank", rank, "load tokenizer", args.model_student)
    tokenizer = BertTokenizer.from_pretrained(args.model_student)

    printlog("rank", rank, "load model", args.model_student)
    config = AutoConfig.from_pretrained(args.model_student)
    if config.architectures and 'BertBasedClassPacked' in config.architectures:
        model = BertPacked(BertModelEMB).from_pretrained(
            args.model_student).to(args.device)
    else:
        model = BertModelEMB.from_pretrained(args.model_student).to(
            args.device)

    if args.supervision_weight > 0:
        model_t = BertModelEMB.from_pretrained(args.model_teacher).to(
            args.device)
    else:
        model_t = None

    if rank == 0:
        #wait while other processes load models
        torch.distributed.barrier()

    #create train and evaluate datasets
    train_dataset_qc = create_squad_qcemb_dataset(rank, args.device,
                                                  args.squad_train_data,
                                                  tokenizer,
                                                  args.max_seq_length_q,
                                                  args.max_seq_length_c)

    test_dataset_qc = create_squad_qcemb_dataset(rank, args.device,
                                                 args.squad_dev_data,
                                                 tokenizer,
                                                 args.max_seq_length_q,
                                                 args.max_seq_length_c)

    if rank >= 0:
        #lets sync after data loaded
        torch.distributed.barrier()

    model_controller = None
    if QUANTIZATION:

        if hasattr(model, 'merge_'):
            #if model is packed, then merge some linera transformations before quantization
            model.merge_()

        if rank in [0, -1]:
            #evaluate before quntization
            model.eval()
            result = evaluate(args, model, test_dataset_qc)
            for n, v in result.items():
                logger.info("original {} - {}".format(n, v))
        if rank >= 0:
            torch.distributed.barrier()

        nncf_config = nncf.NNCFConfig.from_json(args.nncf_config)

        class SquadInitializingDataloader(
                nncf.initialization.InitializingDataLoader):
            def get_inputs(self, batch):
                return [], get_inputs(batch, args.device)

        train_dataloader = DataLoader(train_dataset_qc.c_dataset,
                                      sampler=RandomSampler(
                                          train_dataset_qc.c_dataset),
                                      batch_size=args.per_gpu_train_batch_size)

        initializing_data_loader = SquadInitializingDataloader(
            train_dataloader)
        init_range = nncf.initialization.QuantizationRangeInitArgs(
            initializing_data_loader)
        nncf_config.register_extra_structs([init_range])
        model_controller, model = nncf.create_compressed_model(
            model, nncf_config, dump_graphs=True)
        if rank > -1:
            model_controller.distributed()
            utils.sync_models(rank, model)

        if rank in [-1, 0]:
            #evaluate pure initialized int8 model
            model.eval()
            result = evaluate(args, model, test_dataset_qc)
            for n, v in result.items():
                logger.info("int8 {} - {}".format(n, v))

        if rank > -1:
            #lets sync after quantization
            torch.distributed.barrier()

        #tune FQ parameters only
        train(rank,
              args,
              model,
              model_t,
              train_dataset_qc,
              test_dataset_qc,
              fq_tune_only=True,
              model_controller=model_controller)

    #tune whole quantized model
    train(rank,
          args,
          model,
          model_t,
          train_dataset_qc,
          test_dataset_qc,
          fq_tune_only=False,
          model_controller=model_controller)

    if rank in [-1, 0]:
        #save and evaluate result
        os.makedirs(args.output_dir, exist_ok=True)
        model.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        model.eval()

        #get sample to pass for onnx generation
        with torch.no_grad():
            torch.onnx.export(model,
                              tuple(
                                  torch.zeros((1, args.max_seq_length_c),
                                              dtype=torch.long,
                                              device=args.device)
                                  for t in range(4)),
                              os.path.join(args.output_dir, "model.onnx"),
                              verbose=False,
                              enable_onnx_checker=False,
                              opset_version=10,
                              input_names=[
                                  'input_ids', 'attention_mask',
                                  'token_type_ids', 'position_ids'
                              ],
                              output_names=['embedding'])

        # Evaluate final model
        result = evaluate(args, model, test_dataset_qc)
        for n, v in result.items():
            logger.info("{} - {}".format(n, v))
        logger.info("checkpoint final result {}".format(result))
예제 #12
0
def create_compressed_model_and_algo_for_test(model, config):
    assert isinstance(config, Config)
    tf.keras.backend.clear_session()
    algo, model = create_compressed_model(model, config)
    return model, algo
예제 #13
0
def train_test_export(config):
    strategy = get_distribution_strategy(config)
    strategy_scope = get_strategy_scope(strategy)


    builders = get_dataset_builders(config, strategy)
    datasets = [builder.build() for builder in builders]

    train_builder, validation_builder = builders
    train_dataset, validation_dataset = datasets

    train_epochs = config.epochs
    train_steps = train_builder.num_steps
    validation_steps = validation_builder.num_steps

    if config.model_type == ModelType.KerasLayer:
        args = get_KerasLayer_model()
    else:
        args = None

    with strategy_scope:
        from op_insertion import NNCFWrapperCustom
        if not args:
            args = get_model(config.model_type)

        model = tf.keras.Sequential([
            tf.keras.layers.Input(shape=(224, 224, 3)),
            NNCFWrapperCustom(*args)
        ])
        if SAVE_MODEL_WORKAROUND:
            path = '/tmp/model.pb'
            model.save(path, save_format='tf')
            model = tf.keras.models.load_model(path)


        compression_ctrl, compress_model = create_compressed_model(model, config)
        compression_callbacks = create_compression_callbacks(compression_ctrl, config.log_dir)

        scheduler = build_scheduler(
            config=config,
            epoch_size=train_builder.num_examples,
            batch_size=train_builder.global_batch_size,
            steps=train_steps)
        config['optimizer'] = {'type': 'sgd'}
        optimizer = build_optimizer(
            config=config,
            scheduler=scheduler)

        metrics = get_metrics()
        loss_obj = get_loss()

        compress_model.compile(optimizer=optimizer,
                               loss=loss_obj,
                               metrics=metrics,
                               run_eagerly=config.get('eager_mode', False))

        compress_model.summary()

        logger.info('initialization...')
        compression_ctrl.initialize(dataset=train_dataset)

        initial_epoch = 0
        if config.ckpt_path is not None:
            initial_epoch = resume_from_checkpoint(model=compress_model,
                                                   ckpt_path=config.ckpt_path,
                                                   train_steps=train_steps)

    callbacks = get_callbacks(
        model_checkpoint=True,
        include_tensorboard=True,
        time_history=True,
        track_lr=True,
        write_model_weights=False,
        initial_step=initial_epoch * train_steps,
        batch_size=train_builder.global_batch_size,
        log_steps=100,
        model_dir=config.log_dir)

    callbacks.extend(compression_callbacks)

    validation_kwargs = {
        'validation_data': validation_dataset,
        'validation_steps': validation_steps,
        'validation_freq': 1,
    }

    if 'train' in config.mode:
        logger.info('training...')
        compress_model.fit(
            train_dataset,
            epochs=train_epochs,
            steps_per_epoch=train_steps,
            initial_epoch=initial_epoch,
            callbacks=callbacks,
            **validation_kwargs)

    logger.info('evaluation...')
    compress_model.evaluate(
        validation_dataset,
        steps=validation_steps,
        verbose=1)

    if 'export' in config.mode:
        save_path, save_format = get_saving_parameters(config)
        compression_ctrl.export_model(save_path, save_format)
        logger.info("Saved to {}".format(save_path))
예제 #14
0
def main_worker(current_gpu, config):
    configure_device(current_gpu, config)
    config.mlflow = SafeMLFLow(config)
    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    logger.info(config)

    dataset = get_dataset(config.dataset)
    color_encoding = dataset.color_encoding
    num_classes = len(color_encoding)

    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    train_loader = val_loader = criterion = None
    resuming_checkpoint_path = config.resuming_checkpoint_path

    nncf_config = config.nncf_config

    pretrained = is_pretrained_model_requested(config)

    def criterion_fn(model_outputs, target, criterion_):
        labels, loss_outputs, _ = \
            loss_funcs.do_model_specific_postprocessing(config.model, target, model_outputs)
        return criterion_(loss_outputs, labels)

    if config.to_onnx is not None:
        assert pretrained or (resuming_checkpoint_path is not None)
    else:
        loaders, w_class = load_dataset(dataset, config)
        train_loader, val_loader, init_loader = loaders
        criterion = get_criterion(w_class, config)

        def autoq_test_fn(model, eval_loader):
            return test(model, eval_loader, criterion, color_encoding, config)

        nncf_config = register_default_init_args(nncf_config, init_loader,
                                                 criterion, criterion_fn,
                                                 autoq_test_fn, val_loader,
                                                 config.device)

    model = load_model(config.model,
                       pretrained=pretrained,
                       num_classes=num_classes,
                       model_params=config.get('model_params', {}),
                       weights_path=config.get('weights'))

    model.to(config.device)

    resuming_model_sd = None
    resuming_checkpoint = None
    if resuming_checkpoint_path is not None:
        resuming_model_sd, resuming_checkpoint = load_resuming_model_state_dict_and_checkpoint_from_path(
            resuming_checkpoint_path)
    compression_ctrl, model = create_compressed_model(
        model, nncf_config, resuming_state_dict=resuming_model_sd)
    model, model_without_dp = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_ctrl.distributed()

    log_common_mlflow_params(config)

    if config.to_onnx:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return
    if is_main_process():
        print_statistics(compression_ctrl.statistics())

    if config.mode.lower() == 'test':
        logger.info(model)
        model_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
        params = sum([np.prod(p.size()) for p in model_parameters])
        logger.info("Trainable argument count:{params}".format(params=params))
        model = model.to(config.device)
        test(model, val_loader, criterion, color_encoding, config)
    elif config.mode.lower() == 'train':
        train(model, model_without_dp, compression_ctrl, train_loader,
              val_loader, criterion, color_encoding, config,
              resuming_checkpoint)
    else:
        # Should never happen...but just in case it does
        raise RuntimeError(
            "\"{0}\" is not a valid choice for execution mode.".format(
                config.mode))
예제 #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--epochs", type=int, default=50, help="number of epochs to train (default: 50)"
    )
    parser.add_argument(
        "--lr", type=float, default=0.05, help="learning rate (default: 0.05)"
    )
    parser.add_argument(
        "--enable_nncf_compression",
        action="store_true",
        default=False,
        help="nncf compression flag (default: False)",
    )
    parser.add_argument("--seed", type=int, default=1, help="random seed (default: 1)")
    parser.add_argument(
        "--ckpt_filename",
        type=str,
        default="resnet18_cifar10.pth",
        help="file name for model checkpoint (default: resnet18_cifar10.pth)",
    )
    parser.add_argument(
        "--starting_checkpoint",
        type=str,
        default=None,
        help="checkpoint file name to start training from (default: None)",
    )
    args = parser.parse_args()
    print(args)

    torch.manual_seed(args.seed)

    input_size, num_classes, train_dataset, test_dataset = get_CIFAR10()

    kwargs = {"num_workers": 8, "pin_memory": True}

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=512, shuffle=True, **kwargs
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=5000, shuffle=False, **kwargs
    )

    model = Model()
    model = model.cuda()
    if args.starting_checkpoint is not None:
        model.load_state_dict(torch.load(args.starting_checkpoint))

    compression_ctrl = None

    if args.enable_nncf_compression:
        nncf_config_dict = {
            "compression": {
                "algorithm": "quantization",
                "initializer": {"range": {"num_init_steps": 5}},
            }
        }
        nncf_config = NNCFConfig(nncf_config_dict)
        nncf_config = register_default_init_args(nncf_config, None, train_loader)
        compression_ctrl, model = create_compressed_model(model, nncf_config)

    if args.enable_nncf_compression:
        milestones = [5, 10]
    else:
        milestones = [25, 40]
    optimizer = torch.optim.SGD(
        model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4
    )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=0.1
    )

    for epoch in range(1, args.epochs + 1):
        train(model, train_loader, optimizer, epoch, compression_ctrl)
        test(model, test_loader)
        scheduler.step()
        if compression_ctrl is not None:
            compression_ctrl.scheduler.epoch_step()

    torch.save(model.state_dict(), args.ckpt_filename)
예제 #16
0
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    logger.info(config)

    config.device = get_device(config)
    dataset = get_dataset(config.dataset)
    color_encoding = dataset.color_encoding
    num_classes = len(color_encoding)

    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    train_loader = val_loader = criterion = None
    resuming_checkpoint_path = config.resuming_checkpoint_path

    nncf_config = config.nncf_config

    pretrained = is_pretrained_model_requested(config)

    if config.to_onnx is not None:
        assert pretrained or (resuming_checkpoint_path is not None)
    else:
        loaders, w_class = load_dataset(dataset, config)
        train_loader, val_loader = loaders
        criterion = get_criterion(w_class, config)

    if not resuming_checkpoint_path:
        nncf_config = register_default_init_args(nncf_config, criterion,
                                                 train_loader)

    model = load_model(config.model,
                       pretrained=pretrained,
                       num_classes=num_classes,
                       model_params=config.get('model_params', {}),
                       weights_path=config.get('weights'))

    model.to(config.device)
    compression_ctrl, model = create_compressed_model(model, nncf_config)
    model, model_without_dp = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_ctrl.distributed()

    if resuming_checkpoint_path:
        if not config.pretrained:
            # Load the previously saved model state
            model, _, _, _, _ = \
                load_checkpoint(model, resuming_checkpoint_path, config.device,
                                compression_scheduler=compression_ctrl.scheduler)

    if config.to_onnx:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.mode.lower() == 'test':
        logger.info(model)
        model_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
        params = sum([np.prod(p.size()) for p in model_parameters])
        logger.info("Trainable argument count:{params}".format(params=params))
        model = model.to(config.device)
        test(model, val_loader, criterion, color_encoding, config)
        print_statistics(compression_ctrl.statistics())
    elif config.mode.lower() == 'train':
        train(model, model_without_dp, compression_ctrl, train_loader,
              val_loader, criterion, color_encoding, config)
    else:
        # Should never happen...but just in case it does
        raise RuntimeError(
            "\"{0}\" is not a valid choice for execution mode.".format(
                config.mode))
예제 #17
0
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True) if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))
    compression_ctrl, model = create_compressed_model(model, config)
    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))
    model, _ = prepare_model_for_execution(model, config)
    if config.distributed:
        compression_ctrl.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = get_parameter_groups(model, config)
    optimizer, lr_scheduler = make_optimizer(params_to_optimize, config)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, compression_ctrl, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, compression_ctrl)

    if config.to_onnx is not None:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_dataset, val_dataset = create_datasets(config)
    train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset)

    if config.mode.lower() == 'test':
        print_statistics(compression_ctrl.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_ctrl.initialize(data_loader=train_loader, criterion=criterion)
        train(config, compression_ctrl, model, criterion, is_inception, lr_scheduler, model_name, optimizer,
              train_loader, train_sampler, val_loader, best_acc1)
예제 #18
0
def wrap_nncf_model(model,
                    cfg,
                    data_loader_for_init=None,
                    get_fake_input_func=None):
    """
    The function wraps mmdet model by NNCF
    Note that the parameter `get_fake_input_func` should be the function `get_fake_input`
    -- cannot import this function here explicitly
    """
    check_nncf_is_enabled()
    pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True)
    nncf_config = NNCFConfig(cfg.nncf_config)
    logger = get_root_logger(cfg.log_level)

    if data_loader_for_init:
        wrapped_loader = MMInitializeDataLoader(data_loader_for_init)
        nncf_config = register_default_init_args(nncf_config, None,
                                                 wrapped_loader)

    if cfg.get('resume_from'):
        checkpoint_path = cfg.get('resume_from')
        assert is_checkpoint_nncf(checkpoint_path), (
            'It is possible to resume training with NNCF compression from NNCF checkpoints only. '
            'Use "load_from" with non-compressed model for further compression by NNCF.'
        )
    elif cfg.get('load_from'):
        checkpoint_path = cfg.get('load_from')
        if not is_checkpoint_nncf(checkpoint_path):
            checkpoint_path = None
            logger.info('Received non-NNCF checkpoint to start training '
                        '-- initialization of NNCF fields will be done')
    else:
        checkpoint_path = None

    if not data_loader_for_init and not checkpoint_path:
        raise RuntimeError('Either data_loader_for_init or NNCF pre-trained '
                           'model checkpoint should be set')

    if checkpoint_path:
        logger.info(f'Loading NNCF checkpoint from {checkpoint_path}')
        logger.info(
            'Please, note that this first loading is made before addition of '
            'NNCF FakeQuantize nodes to the model, so there may be some '
            'warnings on unexpected keys')
        resuming_state_dict = load_checkpoint(model, checkpoint_path)
        logger.info(f'Loaded NNCF checkpoint from {checkpoint_path}')
    else:
        resuming_state_dict = None

    if "nncf_compress_postprocessing" in cfg:
        # NB: This parameter is used to choose if we should try to make NNCF compression
        #     for a whole model graph including postprocessing (`nncf_compress_postprocessing=True`),
        #     or make NNCF compression of the part of the model without postprocessing
        #     (`nncf_compress_postprocessing=False`).
        #     Our primary goal is to make NNCF compression of such big part of the model as
        #     possible, so `nncf_compress_postprocessing=True` is our primary choice, whereas
        #     `nncf_compress_postprocessing=False` is our fallback decision.
        #     When we manage to enable NNCF compression for sufficiently many models,
        #     we should keep one choice only.
        nncf_compress_postprocessing = cfg.get('nncf_compress_postprocessing')
        logger.debug('set should_compress_postprocessing='
                     f'{nncf_compress_postprocessing}')
    else:
        nncf_compress_postprocessing = True

    def _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func):
        input_size = nncf_config.get("input_info").get('sample_size')
        assert get_fake_input_func is not None
        assert len(input_size) == 4 and input_size[0] == 1
        H, W, C = input_size[2], input_size[3], input_size[1]
        device = next(model.parameters()).device
        return get_fake_input_func(cfg,
                                   orig_img_shape=tuple([H, W, C]),
                                   device=device)

    def dummy_forward(model):
        fake_data = _get_fake_data_for_forward(cfg, nncf_config,
                                               get_fake_input_func)
        img, img_metas = fake_data["img"], fake_data["img_metas"]
        img = nncf_model_input(img)
        if nncf_compress_postprocessing:
            ctx = model.forward_export_context(img_metas)
            logger.debug(
                f"NNCF will compress a postprocessing part of the model")
        else:
            ctx = model.forward_dummy_context(img_metas)
            logger.debug(
                f"NNCF will NOT compress a postprocessing part of the model")
        with ctx:
            model(img)

    model.dummy_forward_fn = dummy_forward

    compression_ctrl, model = create_compressed_model(
        model,
        nncf_config,
        dummy_forward_fn=dummy_forward,
        resuming_state_dict=resuming_state_dict)
    model = change_export_func_first_conv(model)

    return compression_ctrl, model
예제 #19
0
def main_worker(current_gpu, config: SampleConfig):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # define loss function (criterion)
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    train_loader = train_sampler = val_loader = None
    resuming_checkpoint_path = config.resuming_checkpoint_path
    nncf_config = config.nncf_config

    pretrained = is_pretrained_model_requested(config)

    if config.to_onnx is not None:
        assert pretrained or (resuming_checkpoint_path is not None)
    else:
        # Data loading code
        train_dataset, val_dataset = create_datasets(config)
        train_loader, train_sampler, val_loader = create_data_loaders(
            config, train_dataset, val_dataset)
        nncf_config = register_default_init_args(nncf_config, criterion,
                                                 train_loader)

    # create model
    model_name = config['model']
    model = load_model(model_name,
                       pretrained=pretrained,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'),
                       weights_path=config.get('weights'))

    model.to(config.device)

    resuming_model_sd = None
    resuming_checkpoint = None
    if resuming_checkpoint_path is not None:
        resuming_checkpoint = load_resuming_checkpoint(
            resuming_checkpoint_path)
        resuming_model_sd = resuming_checkpoint['state_dict']

    compression_ctrl, model = create_compressed_model(
        model, nncf_config, resuming_state_dict=resuming_model_sd)

    if config.to_onnx:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    model, _ = prepare_model_for_execution(model, config)
    if config.distributed:
        compression_ctrl.distributed()

    # define optimizer
    params_to_optimize = get_parameter_groups(model, config)
    optimizer, lr_scheduler = make_optimizer(params_to_optimize, config)

    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint_path is not None:
        if config.mode.lower() == 'train' and config.to_onnx is None:
            config.start_epoch = resuming_checkpoint['epoch']
            best_acc1 = resuming_checkpoint['best_acc1']
            compression_ctrl.scheduler.load_state_dict(
                resuming_checkpoint['scheduler'])
            optimizer.load_state_dict(resuming_checkpoint['optimizer'])
            logger.info(
                "=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})".
                format(resuming_checkpoint_path, resuming_checkpoint['epoch'],
                       best_acc1))
        else:
            logger.info(
                "=> loaded checkpoint '{}'".format(resuming_checkpoint_path))

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    if config.mode.lower() == 'test':
        print_statistics(compression_ctrl.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        is_inception = 'inception' in model_name
        train(config, compression_ctrl, model, criterion, is_inception,
              lr_scheduler, model_name, optimizer, train_loader, train_sampler,
              val_loader, best_acc1)