def get_model(learning_rate=1e-3, num_classes=200):

    model = SqueezeNet(num_classes=num_classes)

    # set the first layer not trainable
    model.features[0].weight.requires_grad = False

    # all conv layers except the first and the last
    all_conv_weights = [
        (n, p) for n, p in model.named_parameters()
        if 'weight' in n and not 'bn' in n and not 'features.1.' in n
    ]
    weights_to_be_quantized = [
        p for n, p in all_conv_weights
        if not ('classifier' in n or 'features.0.' in n)
    ]

    # the last layer
    weights = [model.classifier[1].weight]
    biases = [model.classifier[1].bias]

    # parameters of batch_norm layers
    bn_weights = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'weight' in n
    ]
    bn_biases = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'bias' in n
    ]

    params = [{
        'params': weights,
        'weight_decay': 1e-4
    }, {
        'params': weights_to_be_quantized
    }, {
        'params': biases
    }, {
        'params': bn_weights
    }, {
        'params': bn_biases
    }]
    optimizer = optim.Adam(params, lr=learning_rate)

    # loss function
    criterion = nn.CrossEntropyLoss().cuda()
    # move the model to gpu
    model = model.cuda()
    return model, criterion, optimizer
Esempio n. 2
0
def get_model():

    model = SqueezeNet()

    # create different parameter groups
    weights = [
        p for n, p in model.named_parameters()
        if 'weight' in n and not 'bn' in n and not 'features.1.' in n
    ]
    biases = [model.classifier[1].bias]
    bn_weights = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'weight' in n
    ]
    bn_biases = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'bias' in n
    ]

    # initialize batch norm params
    for p in bn_weights:
        constant(p, 1.0)
    for p in bn_biases:
        constant(p, 0.0)

    params = [{
        'params': weights,
        'weight_decay': 1e-3
    }, {
        'params': biases
    }, {
        'params': bn_weights
    }, {
        'params': bn_biases
    }]
    optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True)

    # loss function
    criterion = nn.CrossEntropyLoss().cuda()
    # move the model to gpu
    model = model.cuda()
    return model, criterion, optimizer
def get_model():

    model = SqueezeNet()

    # create different parameter groups
    weights = [
        p for n, p in model.named_parameters()
        if 'weight' in n and 'bn' not in n and 'features.1.' not in n
    ]
    biases = [model.classifier[1].bias]
    bn_weights = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'weight' in n
    ]
    bn_biases = [
        p for n, p in model.named_parameters()
        if ('bn' in n or 'features.1.' in n) and 'bias' in n
    ]

    for p in bn_weights:
        constant(p, 1.0)
    for p in bn_biases:
        constant(p, 0.0)

    params = [{
        'params': weights,
        'weight_decay': 3e-4
    }, {
        'params': biases
    }, {
        'params': bn_weights
    }, {
        'params': bn_biases
    }]
    optimizer = optim.SGD(params, lr=4e-2, momentum=0.95, nesterov=True)

    loss = nn.CrossEntropyLoss().cuda()
    model = model.cuda()  # move the model to gpu
    return model, loss, optimizer