Ejemplo n.º 1
0
def prune_by_percentile(percent,
                        resample=False,
                        reinit=False,
                        total_params=1,
                        hessian_aware=False,
                        criterion=None,
                        dataloader=None,
                        cuda=True,
                        **kwargs):
    global step
    global mask
    global model

    traces = {}
    print("Start Hessian Computing")
    if hessian_aware:
        model.eval()
        hessian_comp = hessian(model, criterion, data=dataloader, cuda=cuda)
        for name, param in model.named_parameters():
            param_frac = param.numel() / total_params
            if 'weight' in name and param_frac > 0.01:
                traces[name] = hessian_comp.trace(name)
                print("Trace for layer:{} is {}".format(name, traces[name]))
        traces_thres = np.mean(np.array(list(traces.values())))
        for name, trace in traces.items():
            traces[name] = np.mean(trace) >= traces_thres

    # Calculate percentile value
    step = 0
    for name, param in model.named_parameters():

        # We do not prune bias term
        if 'weight' in name:  # each layer
            param_frac = param.numel() / total_params
            if param_frac > 0.01:
                tensor = param.data.cpu().numpy()
                alive = tensor[np.nonzero(
                    tensor)]  # flattened array of nonzero values
                if hessian:
                    if traces[name]:
                        hess_percent = percent / 2
                    else:
                        hess_percent = min(percent * 2, 30)
                    print("Hessian-aware pruning percent for layer:{} is {}".
                          format(name, hess_percent))
                    percentile_value = np.percentile(abs(alive), hess_percent)
                else:
                    percentile_value = np.percentile(abs(alive), percent)

                # Convert Tensors to numpy and calculate
                weight_dev = param.device
                new_mask = np.where(
                    abs(tensor) < percentile_value, 0, mask[step])

                # Apply new weight and mask
                param.data = torch.from_numpy(tensor * new_mask).to(
                    weight_dev)  # zero out
                mask[step] = new_mask
                step += 1
    step = 0
def get_hessian(model, dataset_name, batch_size):
    """
    This function creates a Pyhessian object given which can be used to compute top_n 
    eigenvalues, density or trace of the hessian of the model
    Args:
        model: nn.Module
        dataset_name: name of the dataset used to train the model
        batch_size: size of the batch used to create the Pyhessian object
        
    Returns:
        model_hessian: Pyhessian object
        inputs: inputs used to create model_hesssian
        targets: targets used to create model_hesssian
        criterion: loss criterion
    
    """
    # Work with only one (big) batch
    train_loader, _ = utils.load_data("/home/app/datasets",
                                      batch_size=batch_size,
                                      dataset=dataset_name)
    inputs, targets = next(iter(train_loader))
    criterion = torch.nn.CrossEntropyLoss()
    model_hessian = hessian(model,
                            criterion,
                            data=(inputs, targets),
                            cuda=False)

    return model_hessian, inputs, targets, criterion
Ejemplo n.º 3
0
def get_models_trace(models,
                     data_loader,
                     criterion,
                     full_dataset=False,
                     verbose=False,
                     device=None):
    trace_dict = {}

    hessian_dataloader = []
    for i, (inputs, labels) in enumerate(data_loader):
        hessian_dataloader.append((inputs, labels))
        if not full_dataset:
            break

    # get trace
    for k, m in models.items():
        if verbose:
            print(k)
        a = time.time()
        ts = []

        if device is not None:
            m = m.to(device)
            is_gpu = True
        else:
            is_gpu = False

        if full_dataset:
            trace = hessian(m,
                            criterion,
                            dataloader=hessian_dataloader,
                            cuda=is_gpu).trace()
        else:
            trace = hessian(m,
                            criterion,
                            data=hessian_dataloader[0],
                            cuda=is_gpu).trace()

        trace_dict[k] = trace

    return trace_dict
Ejemplo n.º 4
0
def GetHessianEig(path, hessian_dataloader):
    
    criterion = nn.CrossEntropyLoss()  # label loss
    
    # get model
    model = MLP()

    if args.cuda:
        model = model.cuda()
    model = torch.nn.DataParallel(model)
    check_point = torch.load(path)
    model.load_state_dict(check_point.state_dict)
    
    ######################################################
    # Begin the computation
    ######################################################

    # turn model to eval mode
    model.eval()
    if batch_num == 1:
        hessian_comp = hessian(model,
                               criterion,
                               data=hessian_dataloader,
                               cuda=args.cuda)
    else:
        hessian_comp = hessian(model,
                               criterion,
                               dataloader=hessian_dataloader,
                               cuda=args.cuda)

    print(
        '********** finish data londing and begin Hessian computation **********')

    top_eigenvalues, _ = hessian_comp.eigenvalues(top_n=3)

    return top_eigenvalues
Ejemplo n.º 5
0
# Get model checkpoint, get saving folder
###################
if args.resume == '':
    raise Exception("please choose the trained model")
model.load_state_dict(torch.load(args.resume))

######################################################
# Begin the computation
######################################################

# turn model to eval mode
model.eval()
if batch_num == 1:
    hessian_comp = hessian(model,
                           criterion,
                           data=hessian_dataloader,
                           cuda=args.cuda,
                           record_data=True)
else:
    hessian_comp = hessian(model,
                           criterion,
                           dataloader=hessian_dataloader,
                           cuda=args.cuda,
                           record_data=True)

print(
    '********** finish data loading and begin Hessian computation **********')

print("Computing eigenvalues...")
top_eigenvalues, _ = hessian_comp.eigenvalues(top_n=20, debug=True)
print("Computing trace...")
Ejemplo n.º 6
0
    # for k, v in state_dict.items():
    #     name = k[7:]
    #     new_state_dict[name] = v

    model.load_state_dict(torch.load(args.resume + '/' + files[file_index]))

    ######################################################
    # Begin the computation
    ######################################################

    # turn model to eval mode
    model.eval()

    if batch_num == 1:
        hessian_comp = hessian(model,
                               criterion,
                               data=hessian_dataloader,
                               cuda=args.cuda)
    else:
        hessian_comp = hessian(model,
                               criterion,
                               dataloader=hessian_dataloader,
                               cuda=args.cuda)

    print(
        '********** finish data londing and begin Hessian computation **********'
    )
    print('now is for the whole model')

    top_eigenvalues, _ = hessian_comp.eigenvalues()
    trace = hessian_comp.trace()
    density_eigen, density_weight = hessian_comp.density()
###################
# Get model checkpoint, get saving folder
###################
if args.resume == '':
    raise Exception("please choose the trained model")
model.load_state_dict(torch.load(args.resume))

######################################################
# Begin the computation
######################################################

# turn model to eval mode
model.eval()
if batch_num == 1:
    hessian_comp = hessian(model,
                           criterion,
                           data=hessian_dataloader,
                           cuda=args.cuda)
else:
    hessian_comp = hessian(model,
                           criterion,
                           dataloader=hessian_dataloader,
                           cuda=args.cuda)

print(
    '********** finish data londing and begin Hessian computation **********')

top_eigenvalues, _ = hessian_comp.eigenvalues()
trace = hessian_comp.trace()
density_eigen, density_weight = hessian_comp.density()

print('\n***Top Eigenvalues: ', top_eigenvalues)
Ejemplo n.º 8
0
def _hard_training_step(nets,
                        nets_weights,
                        net_optimizers,
                        net_data_loaders,
                        criterion,
                        weight_type,
                        var_noise=None,
                        curr_step=0,
                        writer=None,
                        device=None,
                        hard_train_eps=None):
    """Does update step on all networks and computes the weights.
    If wanting to do a random walk, set learning rate of net_optimizer to zero and set var_noise to noise level."""
    taking_step = True
    steps_taken = 0

    mean_loss = 0
    curr_loss = float("inf")

    assert not (hard_train_eps and (len(nets) > 0))
    continue_training = True

    for idx_net in range(len(nets)):

        # get net and optimizer
        net = nets[idx_net]
        optimizer = net_optimizers[idx_net]

        # get the inputs; data is a list of [inputs, labels]
        try:
            data = next(net_data_loaders[idx_net])
        except:
            taking_step = False
            break
        inputs, labels = data
        if device is not None:
            inputs, labels = inputs.to(device).type(
                torch.cuda.FloatTensor), labels.to(device).type(
                    torch.cuda.LongTensor)

        while continue_training:

            # Compute gradients for input.
            inputs.requires_grad = True

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward(retain_graph=True)
            optimizer.step()

            if var_noise is not None:
                with torch.no_grad():
                    for param in net.parameters():
                        param.add_(torch.randn(param.size()) * var_noise)

            # update weights
            param_grads = get_grad_params_vec(net)
            curr_weight = torch.norm(param_grads)
            nets_weights[idx_net] += curr_weight

            # store metrics for each net
            if writer is not None:
                writer.add_scalar('Loss/train/net_{}'.format(idx_net), loss,
                                  curr_step)
                writer.add_scalar('Potential/curr/net_{}'.format(idx_net),
                                  curr_weight, curr_step)
                writer.add_scalar('Norm/net_{}'.format(idx_net),
                                  torch.norm(get_params_vec(net)), curr_step)
                if (curr_step % 50) == 0:
                    # a = time.time()
                    is_gpu = device is not None
                    trace = np.mean(
                        hessian(net,
                                criterion,
                                data=(inputs, labels),
                                cuda=is_gpu).trace())
                    writer.add_scalar('Trace/net_{}'.format(idx_net), trace,
                                      curr_step)
                    # print("Getting trace took {}".format(time.time() - a))

            mean_loss += float(loss)
            steps_taken += 1
            curr_step += 1
            continue_training = float(loss) > hard_train_eps

    assert taking_step or (idx_net == 0)

    return nets, nets_weights, steps_taken, mean_loss / steps_taken
Ejemplo n.º 9
0
def training_step(nets,
                  nets_weights,
                  net_optimizers,
                  net_data_loaders,
                  criterion,
                  weight_type,
                  var_noise=None,
                  curr_step=0,
                  writer=None,
                  device=None):
    """Does update step on all networks and computes the weights.
    If wanting to do a random walk, set learning rate of net_optimizer to zero and set var_noise to noise level."""
    taking_step = True
    steps_taken = 0

    mean_loss = 0

    for idx_net in range(len(nets)):

        # get net and optimizer
        net = nets[idx_net]
        optimizer = net_optimizers[idx_net]

        # get the inputs; data is a list of [inputs, labels]
        try:
            data = next(net_data_loaders[idx_net])
        except:
            taking_step = False
            break
        inputs, labels = data
        if device is not None:
            inputs, labels = inputs.to(device).type(
                torch.cuda.FloatTensor), labels.to(device).type(
                    torch.cuda.LongTensor)

        # Compute gradients for input.
        inputs.requires_grad = True

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward(retain_graph=True)
        optimizer.step()

        if var_noise is not None:
            with torch.no_grad():
                for param in net.parameters():
                    noise = torch.randn(param.size()) * var_noise
                    if device is not None:
                        noise = noise.to(device)
                    param.add_(noise)

        # update weights
        if weight_type == "input_output_forbenius":
            # zero the parameter
            optimizer.zero_grad()

            # get input gradients
            output_forb = torch.norm(outputs)
            output_forb.backward()
            input_grads = inputs.grad

            curr_weight = weight_function_input_jacobian(input_grads)
            nets_weights[idx_net] += curr_weight
        elif weight_type == "loss_gradient_weights":

            param_grads = get_grad_params_vec(net)
            curr_weight = torch.norm(param_grads)
            nets_weights[idx_net] += curr_weight
        elif weight_type is None:
            pass
        else:
            raise NotImplementedError()

        # store metrics for each net
        if writer is not None:
            writer.add_scalar('Loss/train/net_{}'.format(idx_net), loss,
                              curr_step)
            writer.add_scalar('Potential/curr/net_{}'.format(idx_net),
                              curr_weight, curr_step)
            writer.add_scalar('Potential/total/net_{}'.format(idx_net),
                              nets_weights[idx_net], curr_step)
            writer.add_scalar('Norm/net_{}'.format(idx_net),
                              torch.norm(get_params_vec(net)), curr_step)
            if (curr_step % 50) == 0:
                # a = time.time()
                is_gpu = device is not None
                trace = np.mean(
                    hessian(net, criterion, data=(inputs, labels),
                            cuda=is_gpu).trace())
                writer.add_scalar('Trace/net_{}'.format(idx_net), trace,
                                  curr_step)
                # print("Getting trace took {}".format(time.time() - a))

        mean_loss += float(loss)

    assert taking_step or (idx_net == 0)

    return nets, nets_weights, 1 * taking_step, mean_loss / len(nets)