Beispiel #1
0
def naive_ensembling(args, networks, test_loader):
    # simply average the weights in networks
    if args.width_ratio != 1:
        print(
            "Unfortunately naive ensembling can't work if models are not of same shape!"
        )
        return -1, None
    weights = [(1 - args.ensemble_step), args.ensemble_step]
    avg_pars = get_avg_parameters(networks, weights)
    ensemble_network = get_model_from_name(args)
    # put on GPU
    if args.gpu_id != -1:
        ensemble_network = ensemble_network.cuda(args.gpu_id)

    # check the test performance of the method before
    log_dict = {}
    log_dict['test_losses'] = []
    # log_dict['test_counter'] = [i * len(train_loader.dataset) for i in range(args.n_epochs + 1)]
    routines.test(args, ensemble_network, test_loader, log_dict)

    # set the weights of the ensembled network
    for idx, (name, param) in enumerate(ensemble_network.state_dict().items()):
        ensemble_network.state_dict()[name].copy_(avg_pars[idx].data)

    # check the test performance of the method after ensembling
    log_dict = {}
    log_dict['test_losses'] = []
    # log_dict['test_counter'] = [i * len(train_loader.dataset) for i in range(args.n_epochs + 1)]
    return routines.test(args, ensemble_network, test_loader,
                         log_dict), ensemble_network
Beispiel #2
0
def update_model(args, model, new_params, test=False, test_loader=None, reversed=False, idx=-1):

    updated_model = get_model_from_name(args, idx=idx)
    if args.gpu_id != -1:
        updated_model = updated_model.cuda(args.gpu_id)

    layer_idx = 0
    model_state_dict = model.state_dict()

    print("len of model_state_dict is ", len(model_state_dict.items()))
    print("len of new_params is ", len(new_params))

    for key, value in model_state_dict.items():
        print("updated parameters for layer ", key)
        model_state_dict[key] = new_params[layer_idx]
        layer_idx += 1
        if layer_idx == len(new_params):
            break


    updated_model.load_state_dict(model_state_dict)

    if test:
        log_dict = {}
        log_dict['test_losses'] = []
        final_acc = routines.test(args, updated_model, test_loader, log_dict)
        print("accuracy after update is ", final_acc)
    else:
         final_acc = None

    return updated_model, final_acc
Beispiel #3
0
def train_data_separated_models(args, local_train_loaders, local_test_loaders, test_loader, choices):
    networks = []
    local_accuracies = []
    accuracies = []
    base_nets = []
    base_net = get_model_from_name(args, idx=0)
    base_nets.append(base_net)
    if args.diff_init or args.width_ratio!=1:
        base_nets.append(get_model_from_name(args, idx=1))
    else:
        base_nets.append(base_net)

    for i in range(args.num_models):
        print("\nTraining model {} on its separate data \n ".format(str(i)))
        network, acc, local_acc = get_trained_data_separated_model(args, i,
                                           local_train_loaders[i], local_test_loaders[i], test_loader, base_nets[i])
        networks.append(network)
        accuracies.append(acc)
        local_accuracies.append(local_acc)
        if args.dump_final_models:
            save_final_data_separated_model(args, i, network, local_acc, acc, choices[i])
    return networks, accuracies, local_accuracies
Beispiel #4
0
def get_pretrained_model(args, path, data_separated=False, idx=-1):
    model = get_model_from_name(args, idx=idx)

    if args.gpu_id != -1:
        state = torch.load(
            path,
            map_location=(
                lambda s, _: torch.serialization.default_restore_location(s, 'cuda:' + str(args.gpu_id))
            ),
        )
    else:
        state = torch.load(
            path,
            map_location=(
                lambda s, _: torch.serialization.default_restore_location(s, 'cpu')
            ),
        )


    model_state_dict = state['model_state_dict']

    if 'test_accuracy' not in state:
        state['test_accuracy'] = -1

    if 'epoch' not in state:
        state['epoch'] = -1

    if not data_separated:
        print("Loading model at path {} which had accuracy {} and at epoch {}".format(path, state['test_accuracy'],
                                                                                  state['epoch']))
    else:
        print("Loading model at path {} which had local accuracy {} and overall accuracy {} for choice {} at epoch {}".format(path,
            state['local_test_accuracy'], state['test_accuracy'], state['choice'], state['epoch']))

    model.load_state_dict(model_state_dict)

    if args.gpu_id != -1:
        model = model.cuda(args.gpu_id)

    if not data_separated:
        return model, state['test_accuracy']
    else:
        return model, state['test_accuracy'], state['local_test_accuracy']
Beispiel #5
0
def get_trained_model(args, id, random_seed, train_loader, test_loader):
    torch.backends.cudnn.enabled = False
    torch.manual_seed(random_seed)
    network = get_model_from_name(args, idx=id)

    optimizer = optim.SGD(network.parameters(), lr=args.learning_rate,
                          momentum=args.momentum)
    if args.gpu_id!=-1:
        network = network.cuda(args.gpu_id)
    log_dict = {}
    log_dict['train_losses'] = []
    log_dict['train_counter'] = []
    log_dict['test_losses'] = []
    # log_dict['test_counter'] = [i * len(test_loader.dataset) for i in range(args.n_epochs + 1)]
    # print(list(network.parameters()))
    acc = test(args, network, test_loader, log_dict)
    for epoch in range(1, args.n_epochs + 1):
        train(args, network, optimizer, train_loader, log_dict, epoch, model_id=str(id))
        acc = test(args, network, test_loader, log_dict)
    return network, acc
Beispiel #6
0
def get_network_from_param_list(args, param_list, test_loader):

    print("using independent method")
    new_network = get_model_from_name(args, idx=1)
    if args.gpu_id != -1:
        new_network = new_network.cuda(args.gpu_id)

    # check the test performance of the network before
    log_dict = {}
    log_dict['test_losses'] = []
    routines.test(args, new_network, test_loader, log_dict)

    # set the weights of the new network
    # print("before", new_network.state_dict())
    print("len of model parameters and avg aligned layers is ", len(list(new_network.parameters())),
          len(param_list))
    assert len(list(new_network.parameters())) == len(param_list)

    layer_idx = 0
    model_state_dict = new_network.state_dict()

    print("len of model_state_dict is ", len(model_state_dict.items()))
    print("len of param_list is ", len(param_list))

    for key, value in model_state_dict.items():
        model_state_dict[key] = param_list[layer_idx]
        layer_idx += 1

    new_network.load_state_dict(model_state_dict)

    # check the test performance of the network after
    log_dict = {}
    log_dict['test_losses'] = []
    acc = routines.test(args, new_network, test_loader, log_dict)

    return acc, new_network
Beispiel #7
0
def get_trained_data_separated_model(args, id, local_train_loader, local_test_loader, test_loader, base_net=None):
    torch.backends.cudnn.enabled = False
    if base_net is not None:
        network = copy.deepcopy(base_net)
    else:
        network = get_model_from_name(args, idx=id)
    optimizer = optim.SGD(network.parameters(), lr=args.learning_rate,
                          momentum=args.momentum)
    if args.gpu_id!=-1:
        network = network.cuda(args.gpu_id)
    log_dict = {}
    log_dict['train_losses'] = []
    log_dict['train_counter'] = []
    log_dict['local_test_losses'] = []
    log_dict['test_losses'] = []
    # log_dict['test_counter'] = [i * len(test_loader.dataset) for i in range(args.n_epochs + 1)]
    # print(list(network.parameters()))
    acc = test(args, network, test_loader, log_dict)
    local_acc = test(args, network, local_test_loader, log_dict, is_local=True)
    for epoch in range(1, args.n_epochs + 1):
        train(args, network, optimizer, local_train_loader, log_dict, epoch, model_id=str(id))
        acc = test(args, network, test_loader, log_dict)
        local_acc = test(args, network, local_test_loader, log_dict, is_local=True)
    return network, acc, local_acc
Beispiel #8
0
    if args.gpu_id == -1:
        device = torch.device('cpu')
    else:
        device = torch.device('cuda:{}'.format(args.gpu_id))

    print("------- Prediction based ensembling -------")
    prediction_acc = baseline.prediction_ensembling(args, models, test_loader)

    print("------- Geometric Ensembling -------")
    activations = utils.get_model_activations(args, models, config=config)
    geometric_acc, geometric_model = wasserstein_ensemble.geometric_ensembling_modularized(
        args, models, train_loader, test_loader, activations)
    utils.get_model_size(geometric_model)

    print("------- Distillation!! -------")
    distilled_model = get_model_from_name(args, idx=1)
    distilled_model = distilled_model.to(device)
    utils.get_model_size(distilled_model)

    distill_scratch_init_acc = test_model(args, distilled_model, test_loader)

    distillation_results = {}

    print("------- Distilling Big to scratch -------")
    _, acc = distillation(args, [models[0]], copy.deepcopy(distilled_model),
                          train_loader, test_loader, device)
    distillation_results['scratch_distill_from_big'] = acc

    print("------- Distilling Big to OT Avg. -------")
    _, acc = distillation(args, [models[0]], copy.deepcopy(geometric_model),
                          train_loader, test_loader, device)