Beispiel #1
0
def main():
    print("test")
    device = af.get_pytorch_device()
    model, param = arcs.create_resnet_iterative(
        "networks/", "dense", "0", (True, [0.8, 0.75, 0.66, 0.6], 128), False)

    dataset = data.CIFAR10()
    optimizer, scheduler = af.get_full_optimizer(
        model,
        (param['learning_rate'], param['weight_decay'], param['momentum'], -1),
        ([4], [0.1]))
    train_params = dict(
        epochs=10,
        epoch_growth=[2, 4, 6],
        epoch_prune=[1, 3, 5, 7, 8],
        prune_batch_size=128,
        prune_type="0",
        reinit=False,
        min_ratio=[0.5, 0.4, 0.3, 0.2],
    )
    params, best_model = model_funcs.iter_training_0(model, dataset,
                                                     train_params, optimizer,
                                                     scheduler, device)
    params['epoch_prune'] = train_params['epoch_prune']
    # af.print_sparsity(best_model)
    print("number of flops: {}".format(
        af.calculate_flops(best_model, (3, 32, 32))))
    for layer in best_model.modules():
        if isinstance(layer, (nn.Conv2d, nn.Linear)):
            layer.weight_mask = nn.Parameter(torch.ones_like(
                layer.weight_mask))
    print("number of flops no pruning: {}".format(
        af.calculate_flops(best_model, (3, 32, 32))))
Beispiel #2
0
def train_model(models_path, cr_params, device, num=0):
    type, mode, pruning, ics = cr_params
    model, params = arcs.create_resnet_iterative(models_path, type, mode,
                                                 pruning, ics, False)
    dataset = af.get_dataset('cifar10')
    params['name'] = params['base_model'] + '_{}_{}'.format(type, mode)
    if model.prune:
        params['name'] += "_prune_{}".format(
            [x * 100 for x in model.keep_ratio])
        print("prune: {}".format(model.keep_ratio))
    if mode == "0":
        params['epochs'] = 250
        params['milestones'] = [120, 160, 180]
        params['gammas'] = [0.1, 0.01, 0.01]

    if mode == "1":
        params['epochs'] = 300
        params['milestones'] = [100, 150, 200]
        params['gammas'] = [0.1, 0.1, 0.1]

    if "full" in type:
        params['learning_rate'] = 0.1
    print("lr: {}".format(params['learning_rate']))

    opti_param = (params['learning_rate'], params['weight_decay'],
                  params['momentum'], -1)
    lr_schedule_params = (params['milestones'], params['gammas'])

    model.to(device)
    train_params = dict(
        epochs=params['epochs'],
        epoch_growth=[25, 50, 75],
        epoch_prune=[10, 35, 60, 85, 110, 135, 160],  #[10, 35, 60, 85],
        prune_batch_size=pruning[2],
        prune_type='2',  # 0 skip layer, 1 normal full, 2 iterative
        reinit=False,
        min_ratio=[
            0.3, 0.1, 0.05, 0.05
        ]  # not needed if skip layers, minimum for the iterative pruning
    )

    params['epoch_growth'] = train_params['epoch_growth']
    params['epoch_prune'] = train_params['epoch_prune']
    optimizer, scheduler = af.get_full_optimizer(model, opti_param,
                                                 lr_schedule_params)
    metrics, best_model = model.train_func(model, dataset, train_params,
                                           optimizer, scheduler, device)
    _link_metrics(params, metrics)

    af.print_sparsity(best_model)

    arcs.save_model(best_model, params, models_path, params['name'], epoch=-1)
    print("test acc: {}, last val: {}".format(params['test_top1_acc'],
                                              params['valid_top1_acc'][-1]))
    return best_model, params
Beispiel #3
0
def train_model(models_path, device):
    _, sdn = arcs.create_resnet56(models_path, 'cifar10', save_type='d')
    print('snd name: {}'.format(sdn))
    # train_sdn(models_path, sdn, device)
    print("Training model...")
    trained_model, model_params = arcs.load_model(models_path, sdn, 0)
    dataset = af.get_dataset(model_params['task'])
    lr = model_params['learning_rate']
    momentum = model_params['momentum']
    weight_decay = model_params['weight_decay']
    milestones = model_params['milestones']
    gammas = model_params['gammas']
    num_epochs = model_params['epochs']

    model_params['optimizer'] = 'SGD'

    opti_param = (lr, weight_decay, momentum, -1)
    lr_schedule_params = (milestones, gammas)

    optimizer, scheduler = af.get_full_optimizer(trained_model, opti_param,
                                                 lr_schedule_params)
    trained_model_name = sdn + '_training'

    print('Training: {}...'.format(trained_model_name))
    trained_model.to(device)
    metrics = trained_model.train_func(trained_model,
                                       dataset,
                                       num_epochs,
                                       optimizer,
                                       scheduler,
                                       device=device)
    model_params['train_top1_acc'] = metrics['train_top1_acc']
    model_params['test_top1_acc'] = metrics['test_top1_acc']
    model_params['train_top3_acc'] = metrics['train_top3_acc']
    model_params['test_top3_acc'] = metrics['test_top3_acc']
    model_params['epoch_times'] = metrics['epoch_times']
    model_params['lrs'] = metrics['lrs']
    total_training_time = sum(model_params['epoch_times'])
    model_params['total_time'] = total_training_time
    print('Training took {} seconds...'.format(total_training_time))
    arcs.save_model(trained_model,
                    model_params,
                    models_path,
                    trained_model_name,
                    epoch=-1)
    return trained_model, dataset
Beispiel #4
0
def train(models_path,
          untrained_models,
          sdn=False,
          ic_only_sdn=False,
          device='cpu',
          ds=False):
    print('Training models...')

    for base_model in untrained_models:
        trained_model, model_params = arcs.load_model(models_path, base_model,
                                                      0)
        dataset = af.get_dataset(model_params['task'])

        learning_rate = model_params['learning_rate']
        momentum = model_params['momentum']
        weight_decay = model_params['weight_decay']
        milestones = model_params['milestones']
        gammas = model_params['gammas']
        num_epochs = model_params['epochs']

        model_params['optimizer'] = 'SGD'

        if ic_only_sdn:  # IC-only training, freeze the original weights
            learning_rate = model_params['ic_only']['learning_rate']
            num_epochs = model_params['ic_only']['epochs']
            milestones = model_params['ic_only']['milestones']
            gammas = model_params['ic_only']['gammas']

            model_params['optimizer'] = 'Adam'

            trained_model.ic_only = True
        else:
            trained_model.ic_only = False

        if ds:
            trained_model.ds = True
        else:
            trained_model.ds = False

        optimization_params = (learning_rate, weight_decay, momentum)
        lr_schedule_params = (milestones, gammas)

        # pdb.set_trace()

        if sdn:
            if ic_only_sdn:
                optimizer, scheduler = af.get_sdn_ic_only_optimizer(
                    trained_model, optimization_params, lr_schedule_params)
                trained_model_name = base_model + '_ic_only_ic{}'.format(
                    np.sum(model_params['add_ic']))

            else:
                optimizer, scheduler = af.get_full_optimizer(
                    trained_model, optimization_params, lr_schedule_params)
                trained_model_name = base_model + '_sdn_training_ic{}'.format(
                    np.sum(model_params['add_ic']))

        else:
            optimizer, scheduler = af.get_full_optimizer(
                trained_model, optimization_params, lr_schedule_params)
            trained_model_name = base_model

        if ds:
            trained_model_name = trained_model_name + '_ds'
        # pdb.set_trace()
        print('Training: {}...'.format(trained_model_name))
        # trained_model = nn.DataParallel(trained_model)
        trained_model.to(device)
        metrics = trained_model.train_func(trained_model,
                                           dataset,
                                           num_epochs,
                                           optimizer,
                                           scheduler,
                                           device=device)
        model_params['train_top1_acc'] = metrics['train_top1_acc']
        model_params['test_top1_acc'] = metrics['test_top1_acc']
        model_params['train_top5_acc'] = metrics['train_top5_acc']
        model_params['test_top5_acc'] = metrics['test_top5_acc']
        model_params['epoch_times'] = metrics['epoch_times']
        model_params['lrs'] = metrics['lrs']
        total_training_time = sum(model_params['epoch_times'])
        model_params['total_time'] = total_training_time
        print('Training took {} seconds...'.format(total_training_time))
        arcs.save_model(trained_model,
                        model_params,
                        models_path,
                        trained_model_name,
                        epoch=-1)