def get_model(learning_rate=1e-3, num_classes=200): model = SqueezeNet(num_classes=num_classes) # set the first layer not trainable model.features[0].weight.requires_grad = False # all conv layers except the first and the last all_conv_weights = [ (n, p) for n, p in model.named_parameters() if 'weight' in n and not 'bn' in n and not 'features.1.' in n ] weights_to_be_quantized = [ p for n, p in all_conv_weights if not ('classifier' in n or 'features.0.' in n) ] # the last layer weights = [model.classifier[1].weight] biases = [model.classifier[1].bias] # parameters of batch_norm layers bn_weights = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'bias' in n ] params = [{ 'params': weights, 'weight_decay': 1e-4 }, { 'params': weights_to_be_quantized }, { 'params': biases }, { 'params': bn_weights }, { 'params': bn_biases }] optimizer = optim.Adam(params, lr=learning_rate) # loss function criterion = nn.CrossEntropyLoss().cuda() # move the model to gpu model = model.cuda() return model, criterion, optimizer
def get_model(): model = SqueezeNet() # create different parameter groups weights = [ p for n, p in model.named_parameters() if 'weight' in n and not 'bn' in n and not 'features.1.' in n ] biases = [model.classifier[1].bias] bn_weights = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'bias' in n ] # initialize batch norm params for p in bn_weights: constant(p, 1.0) for p in bn_biases: constant(p, 0.0) params = [{ 'params': weights, 'weight_decay': 1e-3 }, { 'params': biases }, { 'params': bn_weights }, { 'params': bn_biases }] optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True) # loss function criterion = nn.CrossEntropyLoss().cuda() # move the model to gpu model = model.cuda() return model, criterion, optimizer
def get_model(): model = SqueezeNet() # create different parameter groups weights = [ p for n, p in model.named_parameters() if 'weight' in n and 'bn' not in n and 'features.1.' not in n ] biases = [model.classifier[1].bias] bn_weights = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'bias' in n ] for p in bn_weights: constant(p, 1.0) for p in bn_biases: constant(p, 0.0) params = [{ 'params': weights, 'weight_decay': 3e-4 }, { 'params': biases }, { 'params': bn_weights }, { 'params': bn_biases }] optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True) loss = nn.CrossEntropyLoss().cuda() model = model.cuda() # move the model to gpu return model, loss, optimizer
new[k][0] = valid['.'.join(parts[:-1] + ['weight'])].max() / 2 elif parts[-1] == 'W_n': if not k in new: new[k] = torch.Tensor(1) new[k][0] = -valid['.'.join(parts[:-1] + ['weight'])].min() / 2 return new #dct = torch.load('resnet.pth') #dct = fix_state(model.state_dict(), dct) #model.load_state_dict(dct) if use_cuda: model.cuda() criterion = F.cross_entropy optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) def train(epoch): model.train() running_loss = 0 running_total = 0 correct = 0 for i, (inputs, labels) in enumerate(trainloader): if use_cuda: inputs, labels = inputs.cuda(), labels.cuda()