def get_model(learning_rate=1e-3, num_classes=200): model = SqueezeNet(num_classes=num_classes) # set the first layer not trainable model.features[0].weight.requires_grad = False # all conv layers except the first and the last all_conv_weights = [ (n, p) for n, p in model.named_parameters() if 'weight' in n and not 'bn' in n and not 'features.1.' in n ] weights_to_be_quantized = [ p for n, p in all_conv_weights if not ('classifier' in n or 'features.0.' in n) ] # the last layer weights = [model.classifier[1].weight] biases = [model.classifier[1].bias] # parameters of batch_norm layers bn_weights = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'bias' in n ] params = [{ 'params': weights, 'weight_decay': 1e-4 }, { 'params': weights_to_be_quantized }, { 'params': biases }, { 'params': bn_weights }, { 'params': bn_biases }] optimizer = optim.Adam(params, lr=learning_rate) # loss function criterion = nn.CrossEntropyLoss().cuda() # move the model to gpu model = model.cuda() return model, criterion, optimizer
def get_model(): model = SqueezeNet() # create different parameter groups weights = [ p for n, p in model.named_parameters() if 'weight' in n and not 'bn' in n and not 'features.1.' in n ] biases = [model.classifier[1].bias] bn_weights = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'bias' in n ] # initialize batch norm params for p in bn_weights: constant(p, 1.0) for p in bn_biases: constant(p, 0.0) params = [{ 'params': weights, 'weight_decay': 1e-3 }, { 'params': biases }, { 'params': bn_weights }, { 'params': bn_biases }] optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True) # loss function criterion = nn.CrossEntropyLoss().cuda() # move the model to gpu model = model.cuda() return model, criterion, optimizer
def get_model(): model = SqueezeNet() # create different parameter groups weights = [ p for n, p in model.named_parameters() if 'weight' in n and 'bn' not in n and 'features.1.' not in n ] biases = [model.classifier[1].bias] bn_weights = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'weight' in n ] bn_biases = [ p for n, p in model.named_parameters() if ('bn' in n or 'features.1.' in n) and 'bias' in n ] for p in bn_weights: constant(p, 1.0) for p in bn_biases: constant(p, 0.0) params = [{ 'params': weights, 'weight_decay': 3e-4 }, { 'params': biases }, { 'params': bn_weights }, { 'params': bn_biases }] optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True) loss = nn.CrossEntropyLoss().cuda() model = model.cuda() # move the model to gpu return model, loss, optimizer