Example #1
0
def getMnistPIModel(rng, srng):
    '''
    Constructs a real-valued CNN model.

    @param rng: Numpy rng object
    @param srng: rng object used by theano
    @return layer: The CNN model
    @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store
        parameters to a file during/after training
    '''
    weight_decay = 1e-4
    p_dict = {}

    # Layer 1: FC1200
    layer = LayerInput((784, ))
    layer = LayerDropout(layer, 0.2, srng)
    layer = LayerFC(layer,
                    1200,
                    rng,
                    srng,
                    weight_type='real',
                    regularizer='l2',
                    regularizer_weight=weight_decay,
                    initial_parameters=None)
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None)
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerActivationTanh(layer)

    # Layer 2: FC1200
    layer = LayerDropout(layer, 0.4, srng)
    layer = LayerFC(layer,
                    1200,
                    rng,
                    srng,
                    weight_type='real',
                    regularizer='l2',
                    regularizer_weight=weight_decay,
                    initial_parameters=None)
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None)
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerActivationTanh(layer)

    # Layer 3: FC10
    layer = LayerDropout(layer, 0.4, srng)
    layer = LayerFC(layer,
                    10,
                    rng,
                    srng,
                    weight_type='real',
                    regularizer='l2',
                    regularizer_weight=weight_decay,
                    enable_bias=True,
                    bias_type='real',
                    initial_parameters=None)
    p_dict = addParametersToDict(layer, 2, p_dict)
    layer = LayerOutput(layer, 10, objective='crossentropy')

    return layer, p_dict
def getMnistModel(rng, srng):
    '''
    Constructs a real-valued CNN model.

    @param rng: Numpy rng object
    @param srng: rng object used by theano
    @return layer: The CNN model
    @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store
        parameters to a file during/after training
    '''
    weight_decay = 1e-6
    p_dict = {}

    # Layer 1: 32C3
    layer = LayerInput((1,28,28))
    layer = LayerConv(layer, 32, (5,5), (1,1), 'half', rng, srng, weight_type='real', regularizer='l2',
                      regularizer_weight=weight_decay, initial_parameters=None)
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None)
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerActivationTanh(layer)
    layer = LayerPooling(layer, (2,2), mode='max')

    # Layer 2: 64C5-P2
    layer = LayerDropout(layer, 0.2, srng)
    layer = LayerConv(layer, 64, (5,5), (1,1), 'half', rng, srng, weight_type='real', regularizer='l2',
                      regularizer_weight=weight_decay, initial_parameters=None)
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None)
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerActivationTanh(layer)
    layer = LayerPooling(layer, (2,2), mode='max')
    layer = LayerFlatten(layer)

    # Layer 3: FC512
    layer = LayerDropout(layer, 0.3, srng)
    layer = LayerFC(layer, 512, rng, srng, weight_type='real', regularizer='l2',
                    regularizer_weight=weight_decay, initial_parameters=None)
    p_dict = addParametersToDict(layer, 2, p_dict)
    layer = LayerBatchnorm(layer, alpha=0.1, initial_parameters=None)
    p_dict = addParametersToDict(layer, 2, p_dict)
    layer = LayerActivationTanh(layer)

    # Layer 4: FC10
    layer = LayerFC(layer, 10, rng, srng, weight_type='real', regularizer='l2',
                    regularizer_weight=weight_decay, enable_bias=True, bias_type='real', initial_parameters=None)
    p_dict = addParametersToDict(layer, 3, p_dict)
    layer = LayerOutput(layer, 10, objective='crossentropy')
    
    return layer, p_dict
def getMnistPIModel(init_p_dict, rng, srng):
    '''
    Constructs a CNN model with ternary weights and sign activation function.

    @param init_p_dict: Dict containing the initial parameters.
    @param rng: Numpy rng object
    @param srng: rng object used by theano
    @return layer: The CNN model
    @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store
        parameters to a file during/after training
    '''
    regularization_weight = 1e-10
    weight_type = 'ternary'
    p_dict = {}

    # Layer 1: FC1200
    layer = LayerInput((784,))
    layer = LayerDropout(layer, 0.1, srng)
    layer = LayerFC(layer, 1200, rng, srng,
                    weight_type=weight_type,
                    weight_parameterization='logits',
                    weight_initialization_method='probability',
                    regularizer='shayer',
                    regularizer_weight=regularization_weight,
                    logit_bounds=(-5., 5.),
                    initial_parameters=getInitialParametersFromDict(init_p_dict, 0, 'linearforward'))
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(init_p_dict, 0, 'batchnorm'))
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 2: FC1200
    layer = LayerDropout(layer, 0.2, srng)
    layer = LayerFC(layer, 1200, rng, srng,
                    weight_type=weight_type,
                    weight_parameterization='logits',
                    weight_initialization_method='probability',
                    regularizer='shayer',
                    regularizer_weight=regularization_weight,
                    logit_bounds=(-5., 5.),
                    initial_parameters=getInitialParametersFromDict(init_p_dict, 1, 'linearforward'))
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerBatchnorm(layer, alpha=0.1, average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(init_p_dict, 1, 'batchnorm'))
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 3: FC10
    layer = LayerDropout(layer, 0.3, srng)
    layer = LayerFC(layer, 10, rng, srng,
                    weight_type=weight_type,
                    weight_parameterization='logits',
                    weight_initialization_method='probability',
                    regularizer='shayer',
                    regularizer_weight=regularization_weight,
                    enable_bias=True,
                    bias_type='real',
                    enable_activation_normalization=True,
                    logit_bounds=(-5., 5.),
                    initial_parameters=getInitialParametersFromDict(init_p_dict, 2, 'linearforward'))
    p_dict = addParametersToDict(layer, 2, p_dict)
    layer = LayerLocalReparameterization(layer, srng)
    layer = LayerOutput(layer, 10, objective='crossentropy')
    
    return layer, p_dict
Example #4
0
def getSvhnModel(init_p_dict, rng, srng):
    '''
    Constructs a CNN model with ternary weights and sign activation function.

    @param init_p_dict: Dict containing the initial parameters.
    @param rng: Numpy rng object
    @param srng: rng object used by theano
    @return layer: The CNN model
    @return p_dict: Dictionary containing the shared variables with the model parameters. This helps to easily store
        parameters to a file during/after training
    '''
    regularization_weight = 1e-10
    weight_type = 'ternary'
    p_dict = {}

    # Layer 1: 64C3
    layer = LayerInput((3, 32, 32))
    layer = LayerConv(layer,
                      64, (3, 3), (1, 1),
                      'half',
                      rng,
                      srng,
                      weight_type=weight_type,
                      weight_parameterization='logits',
                      weight_initialization_method='probability',
                      regularizer='shayer',
                      regularizer_weight=regularization_weight,
                      logit_bounds=(-5., 5.),
                      initial_parameters=getInitialParametersFromDict(
                          init_p_dict, 0, 'linearforward'))
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerBatchnorm(layer,
                           alpha=0.1,
                           average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(
                               init_p_dict, 0, 'batchnorm'))
    p_dict = addParametersToDict(layer, 0, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 2: 64C3-P2
    layer = LayerDropout(layer, 0.2, srng)
    layer = LayerConv(layer,
                      64, (3, 3), (1, 1),
                      'half',
                      rng,
                      srng,
                      weight_type=weight_type,
                      weight_parameterization='logits',
                      weight_initialization_method='probability',
                      regularizer='shayer',
                      regularizer_weight=regularization_weight,
                      logit_bounds=(-5., 5.),
                      initial_parameters=getInitialParametersFromDict(
                          init_p_dict, 1, 'linearforward'))
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerPooling(layer, (2, 2), mode='max')
    layer = LayerBatchnorm(layer,
                           alpha=0.1,
                           average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(
                               init_p_dict, 1, 'batchnorm'))
    p_dict = addParametersToDict(layer, 1, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 3: 128C3
    layer = LayerDropout(layer, 0.2, srng)
    layer = LayerConv(layer,
                      128, (3, 3), (1, 1),
                      'half',
                      rng,
                      srng,
                      weight_type=weight_type,
                      weight_parameterization='logits',
                      weight_initialization_method='probability',
                      regularizer='shayer',
                      regularizer_weight=regularization_weight,
                      logit_bounds=(-5., 5.),
                      initial_parameters=getInitialParametersFromDict(
                          init_p_dict, 2, 'linearforward'))
    p_dict = addParametersToDict(layer, 2, p_dict)
    layer = LayerBatchnorm(layer,
                           alpha=0.1,
                           average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(
                               init_p_dict, 2, 'batchnorm'))
    p_dict = addParametersToDict(layer, 2, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 4: 128C3-P2
    layer = LayerDropout(layer, 0.3, srng)
    layer = LayerConv(layer,
                      128, (3, 3), (1, 1),
                      'half',
                      rng,
                      srng,
                      weight_type=weight_type,
                      weight_parameterization='logits',
                      weight_initialization_method='probability',
                      regularizer='shayer',
                      regularizer_weight=regularization_weight,
                      logit_bounds=(-5., 5.),
                      initial_parameters=getInitialParametersFromDict(
                          init_p_dict, 3, 'linearforward'))
    p_dict = addParametersToDict(layer, 3, p_dict)
    layer = LayerPooling(layer, (2, 2), mode='max')
    layer = LayerBatchnorm(layer,
                           alpha=0.1,
                           average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(
                               init_p_dict, 3, 'batchnorm'))
    p_dict = addParametersToDict(layer, 3, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 5: 256C3
    layer = LayerDropout(layer, 0.3, srng)
    layer = LayerConv(layer,
                      256, (3, 3), (1, 1),
                      'half',
                      rng,
                      srng,
                      weight_type=weight_type,
                      weight_parameterization='logits',
                      weight_initialization_method='probability',
                      regularizer='shayer',
                      regularizer_weight=regularization_weight,
                      logit_bounds=(-5., 5.),
                      initial_parameters=getInitialParametersFromDict(
                          init_p_dict, 4, 'linearforward'))
    p_dict = addParametersToDict(layer, 4, p_dict)
    layer = LayerBatchnorm(layer,
                           alpha=0.1,
                           average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(
                               init_p_dict, 4, 'batchnorm'))
    p_dict = addParametersToDict(layer, 4, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 6: 256C3-P2
    layer = LayerDropout(layer, 0.3, srng)
    layer = LayerConv(layer,
                      256, (3, 3), (1, 1),
                      'half',
                      rng,
                      srng,
                      weight_type=weight_type,
                      weight_parameterization='logits',
                      weight_initialization_method='probability',
                      regularizer='shayer',
                      regularizer_weight=regularization_weight,
                      logit_bounds=(-5., 5.),
                      initial_parameters=getInitialParametersFromDict(
                          init_p_dict, 5, 'linearforward'))
    p_dict = addParametersToDict(layer, 5, p_dict)
    layer = LayerPooling(layer, (2, 2), mode='max')
    layer = LayerBatchnorm(layer,
                           alpha=0.1,
                           average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(
                               init_p_dict, 5, 'batchnorm'))
    p_dict = addParametersToDict(layer, 5, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)
    layer = LayerFlatten(layer)

    # Layer 7: FC1024
    layer = LayerDropout(layer, 0.4, srng)
    layer = LayerFC(layer,
                    1024,
                    rng,
                    srng,
                    weight_type=weight_type,
                    weight_parameterization='logits',
                    weight_initialization_method='probability',
                    regularizer='shayer',
                    regularizer_weight=regularization_weight,
                    logit_bounds=(-5., 5.),
                    initial_parameters=getInitialParametersFromDict(
                        init_p_dict, 6, 'linearforward'))
    p_dict = addParametersToDict(layer, 6, p_dict)
    layer = LayerBatchnorm(layer,
                           alpha=0.1,
                           average_statistics_over_predictions=True,
                           initial_parameters=getInitialParametersFromDict(
                               init_p_dict, 6, 'batchnorm'))
    p_dict = addParametersToDict(layer, 6, p_dict)
    layer = LayerActivationSign(layer)
    layer = LayerLocalReparameterization(layer, srng)

    # Layer 8: FC10
    layer = LayerFC(layer,
                    10,
                    rng,
                    srng,
                    weight_type=weight_type,
                    weight_parameterization='logits',
                    weight_initialization_method='probability',
                    regularizer='shayer',
                    regularizer_weight=regularization_weight,
                    enable_bias=True,
                    bias_type='real',
                    enable_activation_normalization=True,
                    logit_bounds=(-5., 5.),
                    initial_parameters=getInitialParametersFromDict(
                        init_p_dict, 7, 'linearforward'))
    p_dict = addParametersToDict(layer, 7, p_dict)
    layer = LayerLocalReparameterization(layer, srng)
    layer = LayerOutput(layer, 10, objective='crossentropy')

    return layer, p_dict